ms-sequest 0.0.15 → 0.0.16

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.15
1
+ 0.0.16
@@ -48,21 +48,26 @@ class Ms::Sequest::Srf
48
48
  end
49
49
 
50
50
  # returns a Sequest::Params object or nil if none
51
- def self.get_sequest_params(filename)
51
+ def self.get_sequest_params_and_finish_pos(filename)
52
52
  # split the file in half and only read the second half (since we can be
53
53
  # confident that the params file will be there!)
54
+
55
+ params = nil
56
+ finish_parsing_io_pos = nil
54
57
  File.open(filename, 'rb') do |handle|
55
58
  halfway = handle.stat.size / 2
56
59
  handle.seek halfway
57
60
  last_half = handle.read
58
- if sequest_start_index = last_half.rindex('[SEQUEST]')
59
- params_start_index = sequest_start_index + halfway
61
+ if sequest_start_from_last_half = last_half.rindex('[SEQUEST]')
62
+ params_start_index = sequest_start_from_last_half + halfway
60
63
  handle.seek(params_start_index)
61
- Ms::Sequest::Params.new.parse_io(handle)
64
+ params = Ms::Sequest::Params.new.parse_io(handle)
65
+ finish_parsing_io_pos = handle.pos
62
66
  else
63
67
  nil # not found
64
68
  end
65
69
  end
70
+ [params, finish_parsing_io_pos]
66
71
  end
67
72
 
68
73
  def dta_start_byte
@@ -91,9 +96,6 @@ class Ms::Sequest::Srf
91
96
  # :read_pephits => true | false (default true)
92
97
  # # will attempt to read peptide hit information (equivalent to .out
93
98
  # # files), otherwise, just reads the dta information.
94
- #
95
- # :params => <path/to/sequest.params> Some srf files do not include
96
- # their sequest params file - include it here if necessary.
97
99
  def initialize(filename=nil, opts={})
98
100
  @peps = []
99
101
 
@@ -172,33 +174,28 @@ class Ms::Sequest::Srf
172
174
  def from_file(filename, opts)
173
175
  opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true, :read_pephits => true}.merge(opts)
174
176
 
175
- @params =
176
- if opts[:params]
177
- Ms::Sequest::Params.new(opts[:params])
178
- else
179
- Ms::Sequest::Srf.get_sequest_params(filename)
180
- end
177
+ (@params, after_params_io_pos) = Ms::Sequest::Srf.get_sequest_params_and_finish_pos(filename)
178
+ return unless @params
181
179
 
182
180
  dup_references = 0
183
181
  dup_refs_gt_0 = false
184
- if @params
185
- dup_references = @params.print_duplicate_references.to_i
186
- if dup_references == 0
187
- # warn %Q{
188
- #*****************************************************************************
189
- #WARNING: This srf file lists only 1 protein per peptide! (based on the
190
- #print_duplicate_references parameter in the sequest.params file used in its
191
- #creation) So, downstream output will likewise only contain a single protein
192
- #for each peptide hit. In many instances this is OK since downstream programs
193
- #will recalculate protein-to-peptide linkages from the database file anyway.
194
- #For complete protein lists per peptide hit, .srf files must be created with
195
- #print_duplicate_references > 0. HINT: to capture all duplicate references,
196
- #set the sequest parameter 'print_duplicate_references' to 100 or greater.
197
- #*****************************************************************************
198
- # }
199
- else
200
- dup_refs_gt_0 = true
201
- end
182
+
183
+ dup_references = @params.print_duplicate_references.to_i
184
+ if dup_references == 0
185
+ # warn %Q{
186
+ #*****************************************************************************
187
+ #WARNING: This srf file lists only 1 protein per peptide! (based on the
188
+ #print_duplicate_references parameter in the sequest.params file used in its
189
+ #creation) So, downstream output will likewise only contain a single protein
190
+ #for each peptide hit. In many instances this is OK since downstream programs
191
+ #will recalculate protein-to-peptide linkages from the database file anyway.
192
+ #For complete protein lists per peptide hit, .srf files must be created with
193
+ #print_duplicate_references > 0. HINT: to capture all duplicate references,
194
+ #set the sequest parameter 'print_duplicate_references' to 100 or greater.
195
+ #*****************************************************************************
196
+ # }
197
+ else
198
+ dup_refs_gt_0 = true
202
199
  end
203
200
 
204
201
  File.open(filename, 'rb') do |fh|
@@ -225,11 +222,20 @@ class Ms::Sequest::Srf
225
222
  (@dta_files, @out_files) = read_dta_and_out_interleaved(fh, @header.num_dta_files, unpack_35, dup_refs_gt_0)
226
223
  else
227
224
  @base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
225
+
228
226
  @dta_files = read_dta_files(fh, @header.num_dta_files, unpack_35)
229
227
  if opts[:read_pephits]
230
228
  # need the params file to know if the duplicate_references is set > 0
231
229
  raise NoSequestParamsError, "no sequest params info in srf file!\npass in path to sequest.params file" if @params.nil?
232
230
  @out_files = read_out_files(fh,@header.num_dta_files, unpack_35, dup_refs_gt_0)
231
+
232
+ # FOR DISPLAY ONLY!
233
+ #@out_files.each do |f|
234
+ # if f.num_hits == 10
235
+ # p f.hits.last
236
+ # end
237
+ #end
238
+
233
239
  if fh.eof?
234
240
  #warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
235
241
  @params = nil
@@ -237,20 +243,12 @@ class Ms::Sequest::Srf
237
243
  end
238
244
  end
239
245
  end
240
- start_pos_in_case = fh.pos
241
- @params = Ms::Sequest::Params.new.parse_io(fh)
242
- if @params.nil?
243
- fh.pos = start_pos_in_case
244
- # seek to the index
245
- fh.scanf "\000\000\000\000" do |m|
246
- puts "MATCHING NULLS: "
247
- p m
248
- end
249
- warn "no params file, no index, corrupt file"
250
- else # we have a params file
251
- # This is very sensitive to the grab_params method in sequest params
252
- fh.read(12) ## gap between last params entry and index
253
- end
246
+
247
+ fh.pos = after_params_io_pos
248
+
249
+ # This is very sensitive to the grab_params method in sequest params
250
+ fh.read(12) ## gap between last params entry and index
251
+
254
252
  @index = read_scan_index(fh,@header.num_dta_files)
255
253
  end
256
254
 
@@ -287,6 +285,12 @@ class Ms::Sequest::Srf
287
285
 
288
286
  # returns an index where each entry is [first_scan, last_scan, charge]
289
287
  def read_scan_index(fh, num)
288
+ #string = fh.read(80)
289
+ #puts "STRING: "
290
+ #p string
291
+ #puts string
292
+ #File.open("tmp.tmp",'wb') {|out| out.print string }
293
+ #abort 'her'
290
294
  ind_len = 24
291
295
  index = Array.new(num)
292
296
  unpack_string = 'III'
@@ -294,6 +298,7 @@ class Ms::Sequest::Srf
294
298
  ind_len.times do st << '0' end ## create a 24 byte string to receive data
295
299
  num.times do |i|
296
300
  fh.read(ind_len, st)
301
+ result = st.unpack(unpack_string)
297
302
  index[i] = st.unpack(unpack_string)
298
303
  end
299
304
  index
@@ -39,14 +39,17 @@ M 10 17 1298.5350544522 0.235343858599663 0.823222815990448 151.717300415039 12
39
39
  L gi|90111124|ref|NP_414904.2|
40
40
  END
41
41
 
42
- Srf_file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
43
- Srf_output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.sqt.tmp'
42
+
43
+ module SPEC
44
+ Srf_file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
45
+ Srf_output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.sqt.tmp'
46
+ end
44
47
 
45
48
  shared 'an srf to sqt converter' do
46
49
 
47
50
  before do
48
51
  @original_db_filename = "C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta"
49
- @output = Srf_output
52
+ @output = SPEC::Srf_output
50
53
  end
51
54
 
52
55
  def del(file)
@@ -132,14 +135,14 @@ end
132
135
 
133
136
  describe "programmatic interface srf to sqt" do
134
137
 
135
- @srf = Ms::Sequest::Srf.new(Srf_file)
138
+ @srf = Ms::Sequest::Srf.new(SPEC::Srf_file)
136
139
 
137
- @basic_conversion = lambda { @srf.to_sqt(Srf_output) }
138
- @with_new_db_path = lambda { @srf.to_sqt(Srf_output, :db_info => true, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33') }
139
- @update_the_db_path = lambda { @srf.to_sqt(Srf_output, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33', :update_db_path => true) }
140
+ @basic_conversion = lambda { @srf.to_sqt(SPEC::Srf_output) }
141
+ @with_new_db_path = lambda { @srf.to_sqt(SPEC::Srf_output, :db_info => true, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33') }
142
+ @update_the_db_path = lambda { @srf.to_sqt(SPEC::Srf_output, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33', :update_db_path => true) }
140
143
 
141
144
  before do
142
- @output = Srf_output
145
+ @output = SPEC::Srf_output
143
146
  end
144
147
 
145
148
  behaves_like "an srf to sqt converter"
@@ -170,7 +173,7 @@ describe "command-line interface srf to sqt" do
170
173
  lambda { Ms::Sequest::Srf::Sqt.commandline( string.split(/\s+/) ) }
171
174
  end
172
175
 
173
- base_cmd = "#{Srf_file} -o #{Srf_output}"
176
+ base_cmd = "#{SPEC::Srf_file} -o #{SPEC::Srf_output}"
174
177
  @basic_conversion = commandline_lambda(base_cmd)
175
178
  @with_new_db_path = commandline_lambda(base_cmd + " --db-info --db-path #{Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'}")
176
179
  @update_the_db_path = commandline_lambda(base_cmd + " --db-path #{Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'} --db-update" )
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 15
9
- version: 0.0.15
8
+ - 16
9
+ version: 0.0.16
10
10
  platform: ruby
11
11
  authors:
12
12
  - John T. Prince
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-08-25 00:00:00 -06:00
17
+ date: 2010-09-02 00:00:00 -06:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency