ms-sequest 0.0.15 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.15
1
+ 0.0.16
@@ -48,21 +48,26 @@ class Ms::Sequest::Srf
48
48
  end
49
49
 
50
50
  # returns a Sequest::Params object or nil if none
51
- def self.get_sequest_params(filename)
51
+ def self.get_sequest_params_and_finish_pos(filename)
52
52
  # split the file in half and only read the second half (since we can be
53
53
  # confident that the params file will be there!)
54
+
55
+ params = nil
56
+ finish_parsing_io_pos = nil
54
57
  File.open(filename, 'rb') do |handle|
55
58
  halfway = handle.stat.size / 2
56
59
  handle.seek halfway
57
60
  last_half = handle.read
58
- if sequest_start_index = last_half.rindex('[SEQUEST]')
59
- params_start_index = sequest_start_index + halfway
61
+ if sequest_start_from_last_half = last_half.rindex('[SEQUEST]')
62
+ params_start_index = sequest_start_from_last_half + halfway
60
63
  handle.seek(params_start_index)
61
- Ms::Sequest::Params.new.parse_io(handle)
64
+ params = Ms::Sequest::Params.new.parse_io(handle)
65
+ finish_parsing_io_pos = handle.pos
62
66
  else
63
67
  nil # not found
64
68
  end
65
69
  end
70
+ [params, finish_parsing_io_pos]
66
71
  end
67
72
 
68
73
  def dta_start_byte
@@ -91,9 +96,6 @@ class Ms::Sequest::Srf
91
96
  # :read_pephits => true | false (default true)
92
97
  # # will attempt to read peptide hit information (equivalent to .out
93
98
  # # files), otherwise, just reads the dta information.
94
- #
95
- # :params => <path/to/sequest.params> Some srf files do not include
96
- # their sequest params file - include it here if necessary.
97
99
  def initialize(filename=nil, opts={})
98
100
  @peps = []
99
101
 
@@ -172,33 +174,28 @@ class Ms::Sequest::Srf
172
174
  def from_file(filename, opts)
173
175
  opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true, :read_pephits => true}.merge(opts)
174
176
 
175
- @params =
176
- if opts[:params]
177
- Ms::Sequest::Params.new(opts[:params])
178
- else
179
- Ms::Sequest::Srf.get_sequest_params(filename)
180
- end
177
+ (@params, after_params_io_pos) = Ms::Sequest::Srf.get_sequest_params_and_finish_pos(filename)
178
+ return unless @params
181
179
 
182
180
  dup_references = 0
183
181
  dup_refs_gt_0 = false
184
- if @params
185
- dup_references = @params.print_duplicate_references.to_i
186
- if dup_references == 0
187
- # warn %Q{
188
- #*****************************************************************************
189
- #WARNING: This srf file lists only 1 protein per peptide! (based on the
190
- #print_duplicate_references parameter in the sequest.params file used in its
191
- #creation) So, downstream output will likewise only contain a single protein
192
- #for each peptide hit. In many instances this is OK since downstream programs
193
- #will recalculate protein-to-peptide linkages from the database file anyway.
194
- #For complete protein lists per peptide hit, .srf files must be created with
195
- #print_duplicate_references > 0. HINT: to capture all duplicate references,
196
- #set the sequest parameter 'print_duplicate_references' to 100 or greater.
197
- #*****************************************************************************
198
- # }
199
- else
200
- dup_refs_gt_0 = true
201
- end
182
+
183
+ dup_references = @params.print_duplicate_references.to_i
184
+ if dup_references == 0
185
+ # warn %Q{
186
+ #*****************************************************************************
187
+ #WARNING: This srf file lists only 1 protein per peptide! (based on the
188
+ #print_duplicate_references parameter in the sequest.params file used in its
189
+ #creation) So, downstream output will likewise only contain a single protein
190
+ #for each peptide hit. In many instances this is OK since downstream programs
191
+ #will recalculate protein-to-peptide linkages from the database file anyway.
192
+ #For complete protein lists per peptide hit, .srf files must be created with
193
+ #print_duplicate_references > 0. HINT: to capture all duplicate references,
194
+ #set the sequest parameter 'print_duplicate_references' to 100 or greater.
195
+ #*****************************************************************************
196
+ # }
197
+ else
198
+ dup_refs_gt_0 = true
202
199
  end
203
200
 
204
201
  File.open(filename, 'rb') do |fh|
@@ -225,11 +222,20 @@ class Ms::Sequest::Srf
225
222
  (@dta_files, @out_files) = read_dta_and_out_interleaved(fh, @header.num_dta_files, unpack_35, dup_refs_gt_0)
226
223
  else
227
224
  @base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
225
+
228
226
  @dta_files = read_dta_files(fh, @header.num_dta_files, unpack_35)
229
227
  if opts[:read_pephits]
230
228
  # need the params file to know if the duplicate_references is set > 0
231
229
  raise NoSequestParamsError, "no sequest params info in srf file!\npass in path to sequest.params file" if @params.nil?
232
230
  @out_files = read_out_files(fh,@header.num_dta_files, unpack_35, dup_refs_gt_0)
231
+
232
+ # FOR DISPLAY ONLY!
233
+ #@out_files.each do |f|
234
+ # if f.num_hits == 10
235
+ # p f.hits.last
236
+ # end
237
+ #end
238
+
233
239
  if fh.eof?
234
240
  #warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
235
241
  @params = nil
@@ -237,20 +243,12 @@ class Ms::Sequest::Srf
237
243
  end
238
244
  end
239
245
  end
240
- start_pos_in_case = fh.pos
241
- @params = Ms::Sequest::Params.new.parse_io(fh)
242
- if @params.nil?
243
- fh.pos = start_pos_in_case
244
- # seek to the index
245
- fh.scanf "\000\000\000\000" do |m|
246
- puts "MATCHING NULLS: "
247
- p m
248
- end
249
- warn "no params file, no index, corrupt file"
250
- else # we have a params file
251
- # This is very sensitive to the grab_params method in sequest params
252
- fh.read(12) ## gap between last params entry and index
253
- end
246
+
247
+ fh.pos = after_params_io_pos
248
+
249
+ # This is very sensitive to the grab_params method in sequest params
250
+ fh.read(12) ## gap between last params entry and index
251
+
254
252
  @index = read_scan_index(fh,@header.num_dta_files)
255
253
  end
256
254
 
@@ -287,6 +285,12 @@ class Ms::Sequest::Srf
287
285
 
288
286
  # returns an index where each entry is [first_scan, last_scan, charge]
289
287
  def read_scan_index(fh, num)
288
+ #string = fh.read(80)
289
+ #puts "STRING: "
290
+ #p string
291
+ #puts string
292
+ #File.open("tmp.tmp",'wb') {|out| out.print string }
293
+ #abort 'her'
290
294
  ind_len = 24
291
295
  index = Array.new(num)
292
296
  unpack_string = 'III'
@@ -294,6 +298,7 @@ class Ms::Sequest::Srf
294
298
  ind_len.times do st << '0' end ## create a 24 byte string to receive data
295
299
  num.times do |i|
296
300
  fh.read(ind_len, st)
301
+ result = st.unpack(unpack_string)
297
302
  index[i] = st.unpack(unpack_string)
298
303
  end
299
304
  index
@@ -39,14 +39,17 @@ M 10 17 1298.5350544522 0.235343858599663 0.823222815990448 151.717300415039 12
39
39
  L gi|90111124|ref|NP_414904.2|
40
40
  END
41
41
 
42
- Srf_file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
43
- Srf_output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.sqt.tmp'
42
+
43
+ module SPEC
44
+ Srf_file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
45
+ Srf_output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.sqt.tmp'
46
+ end
44
47
 
45
48
  shared 'an srf to sqt converter' do
46
49
 
47
50
  before do
48
51
  @original_db_filename = "C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta"
49
- @output = Srf_output
52
+ @output = SPEC::Srf_output
50
53
  end
51
54
 
52
55
  def del(file)
@@ -132,14 +135,14 @@ end
132
135
 
133
136
  describe "programmatic interface srf to sqt" do
134
137
 
135
- @srf = Ms::Sequest::Srf.new(Srf_file)
138
+ @srf = Ms::Sequest::Srf.new(SPEC::Srf_file)
136
139
 
137
- @basic_conversion = lambda { @srf.to_sqt(Srf_output) }
138
- @with_new_db_path = lambda { @srf.to_sqt(Srf_output, :db_info => true, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33') }
139
- @update_the_db_path = lambda { @srf.to_sqt(Srf_output, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33', :update_db_path => true) }
140
+ @basic_conversion = lambda { @srf.to_sqt(SPEC::Srf_output) }
141
+ @with_new_db_path = lambda { @srf.to_sqt(SPEC::Srf_output, :db_info => true, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33') }
142
+ @update_the_db_path = lambda { @srf.to_sqt(SPEC::Srf_output, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33', :update_db_path => true) }
140
143
 
141
144
  before do
142
- @output = Srf_output
145
+ @output = SPEC::Srf_output
143
146
  end
144
147
 
145
148
  behaves_like "an srf to sqt converter"
@@ -170,7 +173,7 @@ describe "command-line interface srf to sqt" do
170
173
  lambda { Ms::Sequest::Srf::Sqt.commandline( string.split(/\s+/) ) }
171
174
  end
172
175
 
173
- base_cmd = "#{Srf_file} -o #{Srf_output}"
176
+ base_cmd = "#{SPEC::Srf_file} -o #{SPEC::Srf_output}"
174
177
  @basic_conversion = commandline_lambda(base_cmd)
175
178
  @with_new_db_path = commandline_lambda(base_cmd + " --db-info --db-path #{Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'}")
176
179
  @update_the_db_path = commandline_lambda(base_cmd + " --db-path #{Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'} --db-update" )
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 15
9
- version: 0.0.15
8
+ - 16
9
+ version: 0.0.16
10
10
  platform: ruby
11
11
  authors:
12
12
  - John T. Prince
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-08-25 00:00:00 -06:00
17
+ date: 2010-09-02 00:00:00 -06:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency