ms-sequest 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History CHANGED
@@ -1,3 +1,7 @@
1
+ == 0.0.10 / 2009-12-03
2
+
3
+ * turned off warning if print_duplicates == 0
4
+
1
5
  == 0.0.9 / 2009-09-08
2
6
 
3
7
  * added capability to read srf files created by reading in .out/.dta folders (combined).
data/bin/srf_to_search.rb CHANGED
@@ -1,19 +1,64 @@
1
- #!/usr/bin/env ruby
1
+ #!/usr/bin/ruby
2
2
 
3
3
  require 'rubygems'
4
- require 'tap/task'
4
+ require 'optparse'
5
5
  require 'ms/sequest/srf/search'
6
6
 
7
+ opt = {
8
+ :format => 'mgf'
9
+ }
10
+
11
+ opts = OptionParser.new do |op|
12
+ op.banner = "usage: #{File.basename(__FILE__)} <file>.srf"
13
+ op.separator "outputs: <file>.mgf"
14
+ op.on("-f", "--format <mgf|dat>", "the output format (default: #{opt[:format]})") {|v| opt[:format] = v }
15
+ end
16
+
7
17
  if ARGV.size == 0
8
- ARGV << "--help"
18
+ puts opts
19
+ exit
9
20
  end
10
21
 
22
+ format = opt[:format]
23
+
24
+ ARGV.each do |srf_file|
25
+ base = srf_file.sub(/\.srf$/i, '')
26
+ newfile =
27
+ case format
28
+ when 'dta'
29
+ base
30
+ when 'mgf'
31
+ base << '.' << format
32
+ end
33
+ srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false, :read_pephits => false )
34
+ # options just speed up reading since we don't need .out info anyway
35
+ case format
36
+ when 'mgf'
37
+ srf.to_mgf(newfile)
38
+ when 'dta'
39
+ srf.to_dta_files(newfile)
40
+ end
41
+ end
42
+
43
+
44
+
45
+
46
+
47
+
48
+
49
+
50
+
51
+
52
+
53
+ =begin
54
+
55
+ #require 'tap/task'
11
56
  task_class = Ms::Sequest::Srf::SrfToSearch
12
57
 
13
58
  parser = ConfigParser.new do |opts|
14
59
  opts.separator "configurations"
15
60
  opts.add task_class.configurations
16
-
61
+
17
62
  opts.on "--help", "Print this help" do
18
63
  puts "usage: #{File.basename(__FILE__)} <file>.srf ..."
19
64
  puts
@@ -23,11 +68,11 @@ parser = ConfigParser.new do |opts|
23
68
  end
24
69
 
25
70
  parser.parse!(ARGV)
26
-
71
+
27
72
  task = task_class.new(parser.config)
28
73
 
29
74
  ARGV.each do |file|
30
75
  task.execute(file)
31
76
  end
32
77
 
33
-
78
+ =end
data/lib/ms/sequest.rb CHANGED
@@ -1,6 +1,6 @@
1
1
 
2
2
  module Ms
3
3
  module Sequest
4
- VERSION = '0.0.9'
4
+ VERSION = '0.0.10'
5
5
  end
6
6
  end
@@ -2,6 +2,7 @@
2
2
  # standard lib
3
3
  require 'set'
4
4
  require 'fileutils'
5
+ require 'scanf'
5
6
 
6
7
  # other gems
7
8
  require 'arrayclass'
@@ -12,14 +13,15 @@ require 'ms/id/protein'
12
13
  require 'ms/id/search'
13
14
  require 'ms/sequest/params'
14
15
 
15
- # for conversions
16
- require 'ms/sequest/srf/search'
17
- require 'ms/sequest/srf/sqt'
18
16
 
19
17
  module Ms ; end
20
18
  module Ms::Sequest ; end
21
19
 
22
20
  class Ms::Sequest::Srf
21
+
22
+ class NoSequestParamsError < ArgumentError
23
+ end
24
+
23
25
  include Ms::Id::Search
24
26
 
25
27
  # inherits peps and prots from Search
@@ -70,6 +72,7 @@ class Ms::Sequest::Srf
70
72
  end
71
73
  end
72
74
 
75
+
73
76
  # opts:
74
77
  # :filter_by_precursor_mass_tolerance => true | false (default true)
75
78
  # # this will filter by the sequest params prec tolerance as is
@@ -87,6 +90,9 @@ class Ms::Sequest::Srf
87
90
  # :read_pephits => true | false (default true)
88
91
  # # will attempt to read peptide hit information (equivalent to .out
89
92
  # # files), otherwise, just reads the dta information.
93
+ #
94
+ # :params => <path/to/sequest.params> Some srf files do not include
95
+ # their sequest params file - include it here if necessary.
90
96
  def initialize(filename=nil, opts={})
91
97
  @peps = []
92
98
 
@@ -164,33 +170,38 @@ class Ms::Sequest::Srf
164
170
  # opts are the same as for 'new'
165
171
  def from_file(filename, opts)
166
172
  opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true, :read_pephits => true}.merge(opts)
167
- params = Ms::Sequest::Srf.get_sequest_params(filename)
173
+
174
+ @params =
175
+ if opts[:params]
176
+ Ms::Sequest::Params.new(opts[:params])
177
+ else
178
+ Ms::Sequest::Srf.get_sequest_params(filename)
179
+ end
180
+
168
181
  dup_references = 0
169
182
  dup_refs_gt_0 = false
170
- if params
171
- dup_references = params.print_duplicate_references.to_i
183
+ if @params
184
+ dup_references = @params.print_duplicate_references.to_i
172
185
  if dup_references == 0
173
- warn <<END
174
- *****************************************************************************
175
- WARNING: This srf file lists only 1 protein per peptide! (based on the
176
- print_duplicate_references parameter in the sequest.params file used in its
177
- creation) So, downstream output will likewise only contain a single protein
178
- for each peptide hit. In many instances this is OK since downstream programs
179
- will recalculate protein-to-peptide linkages from the database file anyway.
180
- For complete protein lists per peptide hit, .srf files must be created with
181
- print_duplicate_references > 0. HINT: to capture all duplicate references,
182
- set the sequest parameter 'print_duplicate_references' to 100 or greater.
183
- *****************************************************************************
184
- END
186
+ # warn %Q{
187
+ #*****************************************************************************
188
+ #WARNING: This srf file lists only 1 protein per peptide! (based on the
189
+ #print_duplicate_references parameter in the sequest.params file used in its
190
+ #creation) So, downstream output will likewise only contain a single protein
191
+ #for each peptide hit. In many instances this is OK since downstream programs
192
+ #will recalculate protein-to-peptide linkages from the database file anyway.
193
+ #For complete protein lists per peptide hit, .srf files must be created with
194
+ #print_duplicate_references > 0. HINT: to capture all duplicate references,
195
+ #set the sequest parameter 'print_duplicate_references' to 100 or greater.
196
+ #*****************************************************************************
197
+ # }
185
198
  else
186
199
  dup_refs_gt_0 = true
187
200
  end
188
- else
189
- warn "no params file found in srf, could be combined file or truncated/corrupt file!"
190
201
  end
191
202
 
192
203
  File.open(filename, 'rb') do |fh|
193
- @header = Ms::Sequest::Srf::Header.new.from_io(fh)
204
+ @header = Ms::Sequest::Srf::Header.new.from_io(fh)
194
205
  @version = @header.version
195
206
 
196
207
  unpack_35 = case @version
@@ -215,6 +226,8 @@ END
215
226
  @base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
216
227
  @dta_files = read_dta_files(fh, @header.num_dta_files, unpack_35)
217
228
  if opts[:read_pephits]
229
+ # need the params file to know if the duplicate_references is set > 0
230
+ raise NoSequestParamsError, "no sequest params info in srf file!\npass in path to sequest.params file" if @params.nil?
218
231
  @out_files = read_out_files(fh,@header.num_dta_files, unpack_35, dup_refs_gt_0)
219
232
  if fh.eof?
220
233
  #warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
@@ -228,38 +241,44 @@ END
228
241
  if @params.nil?
229
242
  fh.pos = start_pos_in_case
230
243
  # seek to the index
231
- fh.scanf "\000\000\000\000"
244
+ fh.scanf "\000\000\000\000" do |m|
245
+ puts "MATCHING NULLS: "
246
+ p m
247
+ end
248
+ warn "no params file, no index, corrupt file"
232
249
  else # we have a params file
233
250
  # This is very sensitive to the grab_params method in sequest params
234
251
  fh.read(12) ## gap between last params entry and index
235
252
  end
236
253
  @index = read_scan_index(fh,@header.num_dta_files)
237
- #p @index
238
254
  end
239
255
 
240
256
 
241
257
  ### UPDATE SOME THINGS:
242
258
  # give each hit a base_name, first_scan, last_scan
243
- @index.each_with_index do |ind,i|
244
- mass_measured = @dta_files[i][0]
245
- @out_files[i][0,3] = *ind
246
- pep_hits = @out_files[i][6]
247
- @peps.push( *pep_hits )
248
- pep_hits.each do |pep_hit|
249
- pep_hit[14,4] = @base_name, *ind
250
- # add the deltamass
251
- pep_hit[11] = pep_hit[0] - mass_measured # real - measured (deltamass)
252
- pep_hit[12] = 1.0e6 * pep_hit[11].abs / mass_measured ## ppm
253
- pep_hit[18] = self ## link with the srf object
259
+ if opts[:read_pephits] && !@header.combined
260
+ @index.each_with_index do |ind,i|
261
+ mass_measured = @dta_files[i][0]
262
+ @out_files[i][0,3] = *ind
263
+ pep_hits = @out_files[i][6]
264
+ @peps.push( *pep_hits )
265
+ pep_hits.each do |pep_hit|
266
+ pep_hit[14,4] = @base_name, *ind
267
+ # add the deltamass
268
+ pep_hit[11] = pep_hit[0] - mass_measured # real - measured (deltamass)
269
+ pep_hit[12] = 1.0e6 * pep_hit[11].abs / mass_measured ## ppm
270
+ pep_hit[18] = self ## link with the srf object
271
+ end
254
272
  end
255
- end
256
273
 
257
- filter_by_precursor_mass_tolerance! if params
274
+ filter_by_precursor_mass_tolerance! if params
258
275
 
259
- if opts[:link_protein_hits]
260
- (@peps, @prots) = merge!([peps]) do |_prot, _peps|
261
- prot = Ms::Sequest::Srf::Out::Prot.new(_prot.reference, _peps)
276
+ if opts[:link_protein_hits]
277
+ (@peps, @prots) = merge!([peps]) do |_prot, _peps|
278
+ prot = Ms::Sequest::Srf::Out::Prot.new(_prot.reference, _peps)
279
+ end
262
280
  end
281
+
263
282
  end
264
283
 
265
284
  self
@@ -761,3 +780,4 @@ end
761
780
 
762
781
 
763
782
 
783
+
@@ -1,9 +1,7 @@
1
1
 
2
- require 'tap/task'
3
2
  require 'ms/sequest/srf'
4
3
  require 'ms/mass'
5
4
 
6
-
7
5
  # These are for outputting formats used in MS/MS Search engines
8
6
 
9
7
  module Ms
@@ -101,35 +99,6 @@ module Ms
101
99
  end
102
100
  end
103
101
 
104
- # Ms::Sequest::Srf::SrfToSearch::task converts to MS formats for DB
105
- # searching
106
- #
107
- # outputs the appropriate file or directory structure for <file>.srf:
108
- # <file>.mgf # file for mgf
109
- # <file> # the basename directory for dta
110
- class SrfToSearch < Tap::Task
111
- config :format, "mgf", :short => 'f' # mgf|dta (default: mgf)
112
- def process(srf_file)
113
- base = srf_file.sub(/\.srf$/i, '')
114
- newfile =
115
- case format
116
- when 'dta'
117
- base
118
- when 'mgf'
119
- base << '.' << format
120
- end
121
- srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false )
122
- # options just speed up reading since we don't need .out info anyway
123
- case format
124
- when 'mgf'
125
- srf.to_mgf(newfile)
126
- when 'dta'
127
- srf.to_dta_files(newfile)
128
- end
129
- end
130
- end
131
-
132
-
133
102
  end # Srf
134
103
  end # Sequest
135
104
  end # Ms
@@ -0,0 +1,39 @@
1
+ require 'tap/task'
2
+
3
+ module Ms
4
+ module Sequest
5
+ class Srf
6
+
7
+ # Ms::Sequest::Srf::SrfToSearch::task converts to MS formats for DB
8
+ # searching
9
+ #
10
+ # outputs the appropriate file or directory structure for <file>.srf:
11
+ # <file>.mgf # file for mgf
12
+ # <file> # the basename directory for dta
13
+ class SrfToSearch < Tap::Task
14
+ config :format, "mgf", :short => 'f' # mgf|dta (default: mgf)
15
+ def process(srf_file)
16
+ base = srf_file.sub(/\.srf$/i, '')
17
+ newfile =
18
+ case format
19
+ when 'dta'
20
+ base
21
+ when 'mgf'
22
+ base << '.' << format
23
+ end
24
+ srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false, :read_pephits => false )
25
+ # options just speed up reading since we don't need .out info anyway
26
+ case format
27
+ when 'mgf'
28
+ srf.to_mgf(newfile)
29
+ when 'dta'
30
+ srf.to_dta_files(newfile)
31
+ end
32
+ end
33
+ end
34
+
35
+
36
+ end # Srf
37
+ end # Sequest
38
+ end # Ms
39
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ms-sequest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Prince
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-09-08 00:00:00 -06:00
12
+ date: 2009-12-03 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -67,6 +67,7 @@ files:
67
67
  - lib/ms/sequest.rb
68
68
  - lib/ms/sequest/sqt.rb
69
69
  - lib/ms/sequest/params.rb
70
+ - lib/ms/sequest/srf/search/tap.rb
70
71
  - lib/ms/sequest/srf/sqt.rb
71
72
  - lib/ms/sequest/srf/search.rb
72
73
  - lib/ms/sequest/srf.rb
@@ -102,7 +103,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
102
103
  requirements: []
103
104
 
104
105
  rubyforge_project: mspire
105
- rubygems_version: 1.3.2
106
+ rubygems_version: 1.3.5
106
107
  signing_key:
107
108
  specification_version: 3
108
109
  summary: An mspire library supporting SEQUEST, Bioworks, SQT, etc