ms-sequest 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
data/History CHANGED
@@ -1,3 +1,7 @@
1
+ == 0.0.10 / 2009-12-03
2
+
3
+ * turned off warning if print_duplicates == 0
4
+
1
5
  == 0.0.9 / 2009-09-08
2
6
 
3
7
  * added capability to read srf files created by reading in .out/.dta folders (combined).
data/bin/srf_to_search.rb CHANGED
@@ -1,19 +1,64 @@
1
- #!/usr/bin/env ruby
1
+ #!/usr/bin/ruby
2
2
 
3
3
  require 'rubygems'
4
- require 'tap/task'
4
+ require 'optparse'
5
5
  require 'ms/sequest/srf/search'
6
6
 
7
+ opt = {
8
+ :format => 'mgf'
9
+ }
10
+
11
+ opts = OptionParser.new do |op|
12
+ op.banner = "usage: #{File.basename(__FILE__)} <file>.srf"
13
+ op.separator "outputs: <file>.mgf"
14
+ op.on("-f", "--format <mgf|dat>", "the output format (default: #{opt[:format]})") {|v| opt[:format] = v }
15
+ end
16
+
7
17
  if ARGV.size == 0
8
- ARGV << "--help"
18
+ puts opts
19
+ exit
9
20
  end
10
21
 
22
+ format = opt[:format]
23
+
24
+ ARGV.each do |srf_file|
25
+ base = srf_file.sub(/\.srf$/i, '')
26
+ newfile =
27
+ case format
28
+ when 'dta'
29
+ base
30
+ when 'mgf'
31
+ base << '.' << format
32
+ end
33
+ srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false, :read_pephits => false )
34
+ # options just speed up reading since we don't need .out info anyway
35
+ case format
36
+ when 'mgf'
37
+ srf.to_mgf(newfile)
38
+ when 'dta'
39
+ srf.to_dta_files(newfile)
40
+ end
41
+ end
42
+
43
+
44
+
45
+
46
+
47
+
48
+
49
+
50
+
51
+
52
+
53
+ =begin
54
+
55
+ #require 'tap/task'
11
56
  task_class = Ms::Sequest::Srf::SrfToSearch
12
57
 
13
58
  parser = ConfigParser.new do |opts|
14
59
  opts.separator "configurations"
15
60
  opts.add task_class.configurations
16
-
61
+
17
62
  opts.on "--help", "Print this help" do
18
63
  puts "usage: #{File.basename(__FILE__)} <file>.srf ..."
19
64
  puts
@@ -23,11 +68,11 @@ parser = ConfigParser.new do |opts|
23
68
  end
24
69
 
25
70
  parser.parse!(ARGV)
26
-
71
+
27
72
  task = task_class.new(parser.config)
28
73
 
29
74
  ARGV.each do |file|
30
75
  task.execute(file)
31
76
  end
32
77
 
33
-
78
+ =end
data/lib/ms/sequest.rb CHANGED
@@ -1,6 +1,6 @@
1
1
 
2
2
  module Ms
3
3
  module Sequest
4
- VERSION = '0.0.9'
4
+ VERSION = '0.0.10'
5
5
  end
6
6
  end
@@ -2,6 +2,7 @@
2
2
  # standard lib
3
3
  require 'set'
4
4
  require 'fileutils'
5
+ require 'scanf'
5
6
 
6
7
  # other gems
7
8
  require 'arrayclass'
@@ -12,14 +13,15 @@ require 'ms/id/protein'
12
13
  require 'ms/id/search'
13
14
  require 'ms/sequest/params'
14
15
 
15
- # for conversions
16
- require 'ms/sequest/srf/search'
17
- require 'ms/sequest/srf/sqt'
18
16
 
19
17
  module Ms ; end
20
18
  module Ms::Sequest ; end
21
19
 
22
20
  class Ms::Sequest::Srf
21
+
22
+ class NoSequestParamsError < ArgumentError
23
+ end
24
+
23
25
  include Ms::Id::Search
24
26
 
25
27
  # inherits peps and prots from Search
@@ -70,6 +72,7 @@ class Ms::Sequest::Srf
70
72
  end
71
73
  end
72
74
 
75
+
73
76
  # opts:
74
77
  # :filter_by_precursor_mass_tolerance => true | false (default true)
75
78
  # # this will filter by the sequest params prec tolerance as is
@@ -87,6 +90,9 @@ class Ms::Sequest::Srf
87
90
  # :read_pephits => true | false (default true)
88
91
  # # will attempt to read peptide hit information (equivalent to .out
89
92
  # # files), otherwise, just reads the dta information.
93
+ #
94
+ # :params => <path/to/sequest.params> Some srf files do not include
95
+ # their sequest params file - include it here if necessary.
90
96
  def initialize(filename=nil, opts={})
91
97
  @peps = []
92
98
 
@@ -164,33 +170,38 @@ class Ms::Sequest::Srf
164
170
  # opts are the same as for 'new'
165
171
  def from_file(filename, opts)
166
172
  opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true, :read_pephits => true}.merge(opts)
167
- params = Ms::Sequest::Srf.get_sequest_params(filename)
173
+
174
+ @params =
175
+ if opts[:params]
176
+ Ms::Sequest::Params.new(opts[:params])
177
+ else
178
+ Ms::Sequest::Srf.get_sequest_params(filename)
179
+ end
180
+
168
181
  dup_references = 0
169
182
  dup_refs_gt_0 = false
170
- if params
171
- dup_references = params.print_duplicate_references.to_i
183
+ if @params
184
+ dup_references = @params.print_duplicate_references.to_i
172
185
  if dup_references == 0
173
- warn <<END
174
- *****************************************************************************
175
- WARNING: This srf file lists only 1 protein per peptide! (based on the
176
- print_duplicate_references parameter in the sequest.params file used in its
177
- creation) So, downstream output will likewise only contain a single protein
178
- for each peptide hit. In many instances this is OK since downstream programs
179
- will recalculate protein-to-peptide linkages from the database file anyway.
180
- For complete protein lists per peptide hit, .srf files must be created with
181
- print_duplicate_references > 0. HINT: to capture all duplicate references,
182
- set the sequest parameter 'print_duplicate_references' to 100 or greater.
183
- *****************************************************************************
184
- END
186
+ # warn %Q{
187
+ #*****************************************************************************
188
+ #WARNING: This srf file lists only 1 protein per peptide! (based on the
189
+ #print_duplicate_references parameter in the sequest.params file used in its
190
+ #creation) So, downstream output will likewise only contain a single protein
191
+ #for each peptide hit. In many instances this is OK since downstream programs
192
+ #will recalculate protein-to-peptide linkages from the database file anyway.
193
+ #For complete protein lists per peptide hit, .srf files must be created with
194
+ #print_duplicate_references > 0. HINT: to capture all duplicate references,
195
+ #set the sequest parameter 'print_duplicate_references' to 100 or greater.
196
+ #*****************************************************************************
197
+ # }
185
198
  else
186
199
  dup_refs_gt_0 = true
187
200
  end
188
- else
189
- warn "no params file found in srf, could be combined file or truncated/corrupt file!"
190
201
  end
191
202
 
192
203
  File.open(filename, 'rb') do |fh|
193
- @header = Ms::Sequest::Srf::Header.new.from_io(fh)
204
+ @header = Ms::Sequest::Srf::Header.new.from_io(fh)
194
205
  @version = @header.version
195
206
 
196
207
  unpack_35 = case @version
@@ -215,6 +226,8 @@ END
215
226
  @base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
216
227
  @dta_files = read_dta_files(fh, @header.num_dta_files, unpack_35)
217
228
  if opts[:read_pephits]
229
+ # need the params file to know if the duplicate_references is set > 0
230
+ raise NoSequestParamsError, "no sequest params info in srf file!\npass in path to sequest.params file" if @params.nil?
218
231
  @out_files = read_out_files(fh,@header.num_dta_files, unpack_35, dup_refs_gt_0)
219
232
  if fh.eof?
220
233
  #warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
@@ -228,38 +241,44 @@ END
228
241
  if @params.nil?
229
242
  fh.pos = start_pos_in_case
230
243
  # seek to the index
231
- fh.scanf "\000\000\000\000"
244
+ fh.scanf "\000\000\000\000" do |m|
245
+ puts "MATCHING NULLS: "
246
+ p m
247
+ end
248
+ warn "no params file, no index, corrupt file"
232
249
  else # we have a params file
233
250
  # This is very sensitive to the grab_params method in sequest params
234
251
  fh.read(12) ## gap between last params entry and index
235
252
  end
236
253
  @index = read_scan_index(fh,@header.num_dta_files)
237
- #p @index
238
254
  end
239
255
 
240
256
 
241
257
  ### UPDATE SOME THINGS:
242
258
  # give each hit a base_name, first_scan, last_scan
243
- @index.each_with_index do |ind,i|
244
- mass_measured = @dta_files[i][0]
245
- @out_files[i][0,3] = *ind
246
- pep_hits = @out_files[i][6]
247
- @peps.push( *pep_hits )
248
- pep_hits.each do |pep_hit|
249
- pep_hit[14,4] = @base_name, *ind
250
- # add the deltamass
251
- pep_hit[11] = pep_hit[0] - mass_measured # real - measured (deltamass)
252
- pep_hit[12] = 1.0e6 * pep_hit[11].abs / mass_measured ## ppm
253
- pep_hit[18] = self ## link with the srf object
259
+ if opts[:read_pephits] && !@header.combined
260
+ @index.each_with_index do |ind,i|
261
+ mass_measured = @dta_files[i][0]
262
+ @out_files[i][0,3] = *ind
263
+ pep_hits = @out_files[i][6]
264
+ @peps.push( *pep_hits )
265
+ pep_hits.each do |pep_hit|
266
+ pep_hit[14,4] = @base_name, *ind
267
+ # add the deltamass
268
+ pep_hit[11] = pep_hit[0] - mass_measured # real - measured (deltamass)
269
+ pep_hit[12] = 1.0e6 * pep_hit[11].abs / mass_measured ## ppm
270
+ pep_hit[18] = self ## link with the srf object
271
+ end
254
272
  end
255
- end
256
273
 
257
- filter_by_precursor_mass_tolerance! if params
274
+ filter_by_precursor_mass_tolerance! if params
258
275
 
259
- if opts[:link_protein_hits]
260
- (@peps, @prots) = merge!([peps]) do |_prot, _peps|
261
- prot = Ms::Sequest::Srf::Out::Prot.new(_prot.reference, _peps)
276
+ if opts[:link_protein_hits]
277
+ (@peps, @prots) = merge!([peps]) do |_prot, _peps|
278
+ prot = Ms::Sequest::Srf::Out::Prot.new(_prot.reference, _peps)
279
+ end
262
280
  end
281
+
263
282
  end
264
283
 
265
284
  self
@@ -761,3 +780,4 @@ end
761
780
 
762
781
 
763
782
 
783
+
@@ -1,9 +1,7 @@
1
1
 
2
- require 'tap/task'
3
2
  require 'ms/sequest/srf'
4
3
  require 'ms/mass'
5
4
 
6
-
7
5
  # These are for outputting formats used in MS/MS Search engines
8
6
 
9
7
  module Ms
@@ -101,35 +99,6 @@ module Ms
101
99
  end
102
100
  end
103
101
 
104
- # Ms::Sequest::Srf::SrfToSearch::task converts to MS formats for DB
105
- # searching
106
- #
107
- # outputs the appropriate file or directory structure for <file>.srf:
108
- # <file>.mgf # file for mgf
109
- # <file> # the basename directory for dta
110
- class SrfToSearch < Tap::Task
111
- config :format, "mgf", :short => 'f' # mgf|dta (default: mgf)
112
- def process(srf_file)
113
- base = srf_file.sub(/\.srf$/i, '')
114
- newfile =
115
- case format
116
- when 'dta'
117
- base
118
- when 'mgf'
119
- base << '.' << format
120
- end
121
- srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false )
122
- # options just speed up reading since we don't need .out info anyway
123
- case format
124
- when 'mgf'
125
- srf.to_mgf(newfile)
126
- when 'dta'
127
- srf.to_dta_files(newfile)
128
- end
129
- end
130
- end
131
-
132
-
133
102
  end # Srf
134
103
  end # Sequest
135
104
  end # Ms
@@ -0,0 +1,39 @@
1
+ require 'tap/task'
2
+
3
+ module Ms
4
+ module Sequest
5
+ class Srf
6
+
7
+ # Ms::Sequest::Srf::SrfToSearch::task converts to MS formats for DB
8
+ # searching
9
+ #
10
+ # outputs the appropriate file or directory structure for <file>.srf:
11
+ # <file>.mgf # file for mgf
12
+ # <file> # the basename directory for dta
13
+ class SrfToSearch < Tap::Task
14
+ config :format, "mgf", :short => 'f' # mgf|dta (default: mgf)
15
+ def process(srf_file)
16
+ base = srf_file.sub(/\.srf$/i, '')
17
+ newfile =
18
+ case format
19
+ when 'dta'
20
+ base
21
+ when 'mgf'
22
+ base << '.' << format
23
+ end
24
+ srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false, :read_pephits => false )
25
+ # options just speed up reading since we don't need .out info anyway
26
+ case format
27
+ when 'mgf'
28
+ srf.to_mgf(newfile)
29
+ when 'dta'
30
+ srf.to_dta_files(newfile)
31
+ end
32
+ end
33
+ end
34
+
35
+
36
+ end # Srf
37
+ end # Sequest
38
+ end # Ms
39
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ms-sequest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Prince
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-09-08 00:00:00 -06:00
12
+ date: 2009-12-03 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -67,6 +67,7 @@ files:
67
67
  - lib/ms/sequest.rb
68
68
  - lib/ms/sequest/sqt.rb
69
69
  - lib/ms/sequest/params.rb
70
+ - lib/ms/sequest/srf/search/tap.rb
70
71
  - lib/ms/sequest/srf/sqt.rb
71
72
  - lib/ms/sequest/srf/search.rb
72
73
  - lib/ms/sequest/srf.rb
@@ -102,7 +103,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
102
103
  requirements: []
103
104
 
104
105
  rubyforge_project: mspire
105
- rubygems_version: 1.3.2
106
+ rubygems_version: 1.3.5
106
107
  signing_key:
107
108
  specification_version: 3
108
109
  summary: An mspire library supporting SEQUEST, Bioworks, SQT, etc