mspire 0.1.7 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +41 -14
- data/bin/bioworks2excel.rb +1 -1
- data/bin/bioworks_to_pepxml.rb +46 -59
- data/bin/fasta_shaker.rb +1 -1
- data/bin/filter.rb +6 -0
- data/bin/find_aa_freq.rb +23 -0
- data/bin/id_precision.rb +3 -2
- data/bin/mzxml_to_lmat.rb +2 -1
- data/bin/pepproph_filter.rb +1 -1
- data/bin/precision.rb +1 -1
- data/bin/protein_summary.rb +2 -451
- data/bin/raw_to_mzXML.rb +55 -0
- data/bin/srf_group.rb +26 -0
- data/changelog.txt +7 -0
- data/lib/align.rb +3 -3
- data/lib/fasta.rb +6 -1
- data/lib/gi.rb +9 -4
- data/lib/roc.rb +2 -0
- data/lib/sample_enzyme.rb +2 -1
- data/lib/spec/mzxml/parser.rb +2 -43
- data/lib/spec/mzxml.rb +65 -2
- data/lib/spec_id/aa_freqs.rb +10 -7
- data/lib/spec_id/bioworks.rb +67 -87
- data/lib/spec_id/filter.rb +794 -0
- data/lib/spec_id/precision.rb +29 -36
- data/lib/spec_id/proph.rb +5 -3
- data/lib/spec_id/protein_summary.rb +459 -0
- data/lib/spec_id/sequest.rb +323 -271
- data/lib/spec_id/srf.rb +189 -135
- data/lib/spec_id.rb +276 -227
- data/lib/spec_id_xml.rb +101 -0
- data/lib/toppred.rb +18 -0
- data/script/degenerate_peptides.rb +47 -0
- data/script/filter-peps.rb +5 -1
- data/test/tc_align.rb +1 -1
- data/test/tc_bioworks.rb +25 -22
- data/test/tc_bioworks_to_pepxml.rb +37 -4
- data/test/tc_fasta.rb +3 -1
- data/test/tc_fasta_shaker.rb +8 -6
- data/test/tc_filter.rb +203 -0
- data/test/tc_gi.rb +6 -9
- data/test/tc_id_precision.rb +31 -0
- data/test/tc_mzxml.rb +8 -6
- data/test/tc_peptide_parent_times.rb +2 -1
- data/test/tc_precision.rb +1 -1
- data/test/tc_proph.rb +5 -5
- data/test/tc_protein_summary.rb +36 -13
- data/test/tc_sequest.rb +78 -33
- data/test/tc_spec_id.rb +128 -6
- data/test/tc_srf.rb +84 -38
- metadata +67 -62
- data/bin/fasta_cat.rb +0 -39
- data/bin/fasta_cat_mod.rb +0 -59
- data/bin/fasta_mod.rb +0 -57
- data/bin/filter_spec_id.rb +0 -365
- data/bin/raw2mzXML.rb +0 -21
- data/script/gen_database_searching.rb +0 -258
data/lib/spec/mzxml/parser.rb
CHANGED
@@ -285,47 +285,6 @@ class Spec::MzXML::Parser
|
|
285
285
|
# in progress
|
286
286
|
end
|
287
287
|
|
288
|
-
# first, converts backslash to forward slash in filename.
|
289
|
-
# if .mzXML returns the filename
|
290
|
-
# if .raw or .RAW converts the file to .mZXML and returns mzXML filename
|
291
|
-
# if no recognized extension, looks for .mzXML file, then .RAW file (and
|
292
|
-
# converts)
|
293
|
-
# aborts if file was not able to be converted
|
294
|
-
def file_to_mzxml(file)
|
295
|
-
file.gsub!("\\",'/')
|
296
|
-
old_file = file.dup
|
297
|
-
if file =~ /\.mzXML$/
|
298
|
-
return file
|
299
|
-
elsif file =~ /(\.RAW)|(\.raw)$/
|
300
|
-
old_file = file.dup
|
301
|
-
## t2x outputs in cwd (so go to the directory of the file!)
|
302
|
-
dir = File.dirname(file)
|
303
|
-
basename = File.basename(file)
|
304
|
-
Dir.chdir(dir) do
|
305
|
-
cmd = "#{Spec::MzXML::MZXML_CONVERTER} #{basename}"
|
306
|
-
puts cmd
|
307
|
-
puts `#{cmd}`
|
308
|
-
end
|
309
|
-
file.sub!(/\.RAW$/, '.mzXML')
|
310
|
-
file.sub!(/\.raw$/, '.mzXML')
|
311
|
-
unless File.exist? file
|
312
|
-
abort "Couldn't convert #{old_file} to #{file}"
|
313
|
-
end
|
314
|
-
return file
|
315
|
-
else
|
316
|
-
if File.exist?( file + '.mzXML' )
|
317
|
-
return file_to_mzxml(file + '.mzXML')
|
318
|
-
elsif File.exist?( file + '.RAW' )
|
319
|
-
return file_to_mzxml(file + '.RAW')
|
320
|
-
elsif File.exist?( file + '.raw' )
|
321
|
-
return file_to_mzxml(file + '.raw')
|
322
|
-
else
|
323
|
-
return nil
|
324
|
-
end
|
325
|
-
end
|
326
|
-
|
327
|
-
end
|
328
|
-
|
329
288
|
def get_prec_mz_by_scan_for_time_index(file)
|
330
289
|
index = Spec::MSRunIndex.new(file)
|
331
290
|
prec_mz_by_scan = index.scans_by_num.collect do |scan|
|
@@ -356,7 +315,7 @@ class Spec::MzXML::Parser
|
|
356
315
|
return get_prec_mz_by_scan_for_time_index(file)
|
357
316
|
end
|
358
317
|
|
359
|
-
file = file_to_mzxml(file)
|
318
|
+
file = Spec::MzXML.file_to_mzxml(file)
|
360
319
|
|
361
320
|
unless parse_type then parse_type = default_parser end
|
362
321
|
case parse_type
|
@@ -386,7 +345,7 @@ class Spec::MzXML::Parser
|
|
386
345
|
# startMz start_mz
|
387
346
|
# endMz end_mz
|
388
347
|
def basic_info(mzxml_file)
|
389
|
-
puts "parsing: #{mzxml_file} #{File.exist?(mzxml_file)}"
|
348
|
+
puts "parsing: #{mzxml_file} #{File.exist?(mzxml_file)}" if $VERBOSE
|
390
349
|
hash = {}
|
391
350
|
scan_count_tmp = []
|
392
351
|
(1..5).to_a.each do |n| scan_count_tmp[n] = 0 end
|
data/lib/spec/mzxml.rb
CHANGED
@@ -4,14 +4,62 @@ require 'base64'
|
|
4
4
|
module Spec; end
|
5
5
|
|
6
6
|
module Spec::MzXML
|
7
|
-
|
8
|
-
MZXML_CONVERTER = 't2x'
|
7
|
+
Potential_mzxml_converters = %w(readw.exe readw t2x)
|
9
8
|
|
10
9
|
# takes PT2.7500000S and returns it as 2.700000 (no PT or S)
|
11
10
|
def strip_time(time)
|
12
11
|
return time[2...-1]
|
13
12
|
end
|
14
13
|
|
14
|
+
# first, converts backslash to forward slash in filename.
|
15
|
+
# if .mzXML returns the filename
|
16
|
+
# if .raw or .RAW converts the file to .mZXML and returns mzXML filename
|
17
|
+
# if no recognized extension, looks for .mzXML file, then .RAW file (and
|
18
|
+
# converts)
|
19
|
+
# aborts if file was not able to be converted
|
20
|
+
# returns nil if a file that can be converted or used was not found
|
21
|
+
def self.file_to_mzxml(file)
|
22
|
+
file.gsub!("\\",'/')
|
23
|
+
old_file = file.dup
|
24
|
+
if file =~ /\.mzXML$/
|
25
|
+
return file
|
26
|
+
elsif file =~ /\.RAW$/i
|
27
|
+
old_file = file.dup
|
28
|
+
## t2x outputs in cwd (so go to the directory of the file!)
|
29
|
+
dir = File.dirname(file)
|
30
|
+
basename = File.basename(file)
|
31
|
+
converter = Spec::MzXML.find_mzxml_converter
|
32
|
+
Dir.chdir(dir) do
|
33
|
+
if converter =~ /readw/
|
34
|
+
cmd = "#{converter} #{basename} c #{basename.sub(/\.RAW$/i, '.mzXML')}"
|
35
|
+
else
|
36
|
+
cmd = "#{converter} #{basename}"
|
37
|
+
end
|
38
|
+
#puts cmd
|
39
|
+
#puts `#{cmd}`
|
40
|
+
reply = `#{cmd}`
|
41
|
+
puts reply if $VERBOSE
|
42
|
+
end
|
43
|
+
file.sub!(/\.RAW$/i, '.mzXML')
|
44
|
+
unless File.exist? file
|
45
|
+
abort "Couldn't convert #{old_file} to #{file}"
|
46
|
+
end
|
47
|
+
return file
|
48
|
+
else
|
49
|
+
if File.exist?( file + '.mzXML' )
|
50
|
+
return file_to_mzxml(file + '.mzXML')
|
51
|
+
elsif File.exist?( file + '.RAW' )
|
52
|
+
return file_to_mzxml(file + '.RAW')
|
53
|
+
elsif File.exist?( file + '.raw' )
|
54
|
+
return file_to_mzxml(file + '.raw')
|
55
|
+
else
|
56
|
+
return nil
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
|
15
63
|
|
16
64
|
# takes a base64 peaks string and returns an array of [m/z,intens] doublets
|
17
65
|
# mzXML as network ordered
|
@@ -42,5 +90,20 @@ module Spec::MzXML
|
|
42
90
|
b64d.unpack(unpack_code)
|
43
91
|
end
|
44
92
|
|
93
|
+
# Searchs each path element and returns the first one it finds
|
94
|
+
# returns nil if none found
|
95
|
+
def self.find_mzxml_converter
|
96
|
+
ENV['PATH'].split(/[:;]/).each do |path|
|
97
|
+
Dir.chdir(path) do
|
98
|
+
Potential_mzxml_converters.each do |pc|
|
99
|
+
if File.exist? pc
|
100
|
+
return File.join(path, pc)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
nil
|
106
|
+
end
|
107
|
+
|
45
108
|
|
46
109
|
end
|
data/lib/spec_id/aa_freqs.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
require 'fasta'
|
2
2
|
|
3
|
+
module SpecID ; end
|
4
|
+
|
3
5
|
class SpecID::AAFreqs
|
4
6
|
# a fasta object
|
5
7
|
attr_accessor :fasta
|
@@ -77,16 +79,17 @@ class SpecID::AAFreqs
|
|
77
79
|
end
|
78
80
|
|
79
81
|
# pep_objs respond to sequence?
|
82
|
+
# also takes a hash of peptides keyed on :aaseq
|
80
83
|
def actual_and_expected_number_containing_cysteines(pep_objs, cyst_freq)
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
else
|
87
|
-
abort v.sequence.to_s + " could not be matched!"
|
84
|
+
if pep_objs.is_a? Hash
|
85
|
+
seqs = pep_objs.keys
|
86
|
+
else
|
87
|
+
seqs = pep_objs.map do |v|
|
88
|
+
v.aaseq
|
88
89
|
end
|
89
90
|
end
|
91
|
+
@aafreqs ||= {}
|
92
|
+
@aafreqs[:C] = cyst_freq
|
90
93
|
actual_and_expected_number(seqs, :C, 1)
|
91
94
|
end
|
92
95
|
|
data/lib/spec_id/bioworks.rb
CHANGED
@@ -6,13 +6,17 @@ require 'spec_id'
|
|
6
6
|
require 'zlib'
|
7
7
|
require 'hash_by'
|
8
8
|
require 'set_from_hash'
|
9
|
+
require 'array_class'
|
9
10
|
|
10
11
|
## have to pre-declare some guys
|
11
|
-
|
12
|
-
|
12
|
+
module SpecID; end
|
13
|
+
module SpecID::Prot; end
|
14
|
+
module SpecID::Pep; end
|
13
15
|
module SpecIDXML; end
|
14
16
|
|
15
|
-
class
|
17
|
+
class Bioworks
|
18
|
+
include SpecID
|
19
|
+
|
16
20
|
# Regular expressions
|
17
21
|
@@bioworksinfo_re = /<bioworksinfo>(.*)<\/bioworksinfo>/o
|
18
22
|
@@modifications_re = /<modifications>(.*)<\/modifications>/o
|
@@ -21,10 +25,9 @@ class SpecID::Bioworks
|
|
21
25
|
@@origfilepath_re = /<origfilepath>(.*)<\/origfilepath>/o
|
22
26
|
|
23
27
|
|
24
|
-
attr_accessor :prots, :version, :global_filename, :origfilename, :origfilepath
|
28
|
+
attr_accessor :peps, :prots, :version, :global_filename, :origfilename, :origfilepath
|
25
29
|
# a string of modifications e.g., "(M* +15.99491) (S@ +14.9322) "
|
26
30
|
attr_accessor :modifications
|
27
|
-
attr_writer :peps
|
28
31
|
|
29
32
|
def hi_prob_best ; false end
|
30
33
|
|
@@ -127,6 +130,7 @@ class SpecID::Bioworks
|
|
127
130
|
# note that each pep will contain its original prot it belongs to, even
|
128
131
|
# though the parallel protein actually represents the proteins it belongs
|
129
132
|
# to.
|
133
|
+
# assumes that each peptide points to all its proteins in pep.prots
|
130
134
|
def _uniq_peps_by_sequence_charge(peps)
|
131
135
|
new_arr = []
|
132
136
|
prot_arr = []
|
@@ -134,11 +138,11 @@ class SpecID::Bioworks
|
|
134
138
|
(0...peps.size).each do |i|
|
135
139
|
next if index_accounted_for.include?(i)
|
136
140
|
new_arr << peps[i]
|
137
|
-
prot_arr.push(
|
141
|
+
prot_arr.push( peps[i].prots )
|
138
142
|
((i+1)...peps.size).each do |j|
|
139
143
|
pep1, pep2 = peps[i], peps[j]
|
140
144
|
if pep1.sequence == pep2.sequence && pep1.charge == pep2.charge
|
141
|
-
prot_arr.last.push pep2.
|
145
|
+
prot_arr.last.push( *(pep2.prots) )
|
142
146
|
index_accounted_for << j
|
143
147
|
end
|
144
148
|
end
|
@@ -149,13 +153,14 @@ class SpecID::Bioworks
|
|
149
153
|
def initialize(file=nil)
|
150
154
|
@peps = nil
|
151
155
|
if file
|
156
|
+
@filename = file
|
152
157
|
parse_xml(file)
|
153
158
|
#parse_xml_by_xmlparser(file)
|
154
159
|
end
|
155
160
|
end
|
156
161
|
|
157
162
|
def parse_xml_by_xmlparser(file)
|
158
|
-
parser =
|
163
|
+
parser = Bioworks::XMLParser.new
|
159
164
|
File.open(file) do |fh|
|
160
165
|
#3.times do fh.gets end ## TEMPFIX
|
161
166
|
parser.parse(fh)
|
@@ -165,23 +170,6 @@ class SpecID::Bioworks
|
|
165
170
|
@prots = parser.prots
|
166
171
|
end
|
167
172
|
|
168
|
-
|
169
|
-
# Returns the list of all peptide hits. A given sequence/charge or scan
|
170
|
-
# may be redundant!
|
171
|
-
def peps
|
172
|
-
if @peps
|
173
|
-
return @peps
|
174
|
-
else
|
175
|
-
@peps = []
|
176
|
-
prots.each do |prot|
|
177
|
-
prot.peps.each do |pep|
|
178
|
-
@peps << pep
|
179
|
-
end
|
180
|
-
end
|
181
|
-
return @peps
|
182
|
-
end
|
183
|
-
end
|
184
|
-
|
185
173
|
# This is highly specific to Bioworks 3.2 xml export. In other words,
|
186
174
|
# unless the newlines, etc. are duplicated, this parser will fail! Not
|
187
175
|
# robust, but it is faster than xmlparser (which is based on the speedy
|
@@ -200,21 +188,23 @@ class SpecID::Bioworks
|
|
200
188
|
end
|
201
189
|
@version = get_regex_val(fh, @@bioworksinfo_re)
|
202
190
|
@modifications = get_regex_val(fh, @@modifications_re)
|
203
|
-
@prots =
|
191
|
+
@prots, @peps = get_prots_from_xml_stream(fh)
|
204
192
|
fh.close
|
205
193
|
end
|
206
194
|
|
207
|
-
|
195
|
+
## returns proteins and peptides
|
196
|
+
def get_prots_from_xml_stream(fh)
|
197
|
+
uniq_pephit_hash = {}
|
208
198
|
prots = []
|
209
199
|
while line = fh.gets
|
210
200
|
if line =~ @@protein_re
|
211
|
-
prot =
|
201
|
+
prot = Bioworks::Prot.new
|
212
202
|
prot.bioworks = self
|
213
|
-
prot.set_from_xml_stream(fh,
|
203
|
+
prot.set_from_xml_stream(fh, uniq_pephit_hash)
|
214
204
|
prots << prot
|
215
205
|
end
|
216
206
|
end
|
217
|
-
prots
|
207
|
+
[prots, uniq_pephit_hash.values]
|
218
208
|
end
|
219
209
|
|
220
210
|
# gets the regex and stops (and rewinds if it hits a protein)
|
@@ -246,7 +236,7 @@ end
|
|
246
236
|
|
247
237
|
# Implements fast parsing via XMLParser (wrapper around Expat)
|
248
238
|
# It is actually slower (about %25 slower) than regular expression parsing
|
249
|
-
class
|
239
|
+
class Bioworks::XMLParser < XMLParser
|
250
240
|
@@at = '@'
|
251
241
|
attr_accessor :prots
|
252
242
|
|
@@ -262,18 +252,18 @@ class SpecID::Bioworks::XMLParser < XMLParser
|
|
262
252
|
case name
|
263
253
|
when "peptide"
|
264
254
|
curr_prot = @current_obj
|
265
|
-
if @current_obj.class ==
|
255
|
+
if @current_obj.class == Bioworks::Prot
|
266
256
|
@current_obj.set_from_xml_hash_xmlparser(@current_hash)
|
267
257
|
else
|
268
258
|
curr_prot = @current_obj.prot ## unless previous was a peptide
|
269
259
|
end
|
270
|
-
peptide =
|
260
|
+
peptide = Bioworks::Pep.new
|
271
261
|
peptide.prot = curr_prot
|
272
262
|
curr_prot.peps << peptide
|
273
263
|
@current_obj = peptide
|
274
264
|
@current_hash = {}
|
275
265
|
when "protein"
|
276
|
-
@current_obj =
|
266
|
+
@current_obj = Bioworks::Prot.new
|
277
267
|
@current_hash = {}
|
278
268
|
@prots << @current_obj
|
279
269
|
else
|
@@ -297,13 +287,14 @@ class SpecID::Bioworks::XMLParser < XMLParser
|
|
297
287
|
|
298
288
|
end
|
299
289
|
|
300
|
-
module
|
290
|
+
module Bioworks::XML
|
301
291
|
# The regular expression to grab attributes from the bioworks xml format
|
302
292
|
@@att_re = /<([\w]+)>(.*)<\/[\w]+>/o
|
303
293
|
end
|
304
294
|
|
305
|
-
class
|
306
|
-
include SpecID::
|
295
|
+
class Bioworks::Prot
|
296
|
+
include SpecID::Prot
|
297
|
+
include Bioworks::XML
|
307
298
|
|
308
299
|
@@end_prot_re = /<\/protein>/o
|
309
300
|
@@pep_re = /<peptide>/o
|
@@ -323,15 +314,32 @@ class SpecID::Bioworks::Prot < SpecID::Prot
|
|
323
314
|
end
|
324
315
|
end
|
325
316
|
|
326
|
-
def set_from_xml_stream(fh,
|
317
|
+
def set_from_xml_stream(fh, uniq_pephit_hash)
|
327
318
|
hash = {}
|
319
|
+
@peps = []
|
328
320
|
while line = fh.gets
|
329
321
|
if line =~ @@att_re
|
330
322
|
hash[$1] = $2
|
331
323
|
elsif line =~ @@pep_re
|
332
|
-
|
333
|
-
|
324
|
+
## Could do a look ahead to grab the file and sequence to check
|
325
|
+
## uniqueness to increase speed here.
|
326
|
+
pep = Bioworks::Pep.new.set_from_xml_stream(fh)
|
327
|
+
# normal search results files have a global filename
|
328
|
+
# while multi-consensus do not
|
329
|
+
pep[12] ||= bioworks.global_filename
|
330
|
+
|
331
|
+
## figure out uniqueness
|
332
|
+
ky = [pep.base_name, pep.first_scan, pep.charge, pep.sequence]
|
333
|
+
if uniq_pephit_hash.key? ky
|
334
|
+
pep = uniq_pephit_hash[ky]
|
335
|
+
else
|
336
|
+
## insert the new protein
|
337
|
+
pep.prots = []
|
338
|
+
uniq_pephit_hash[ky] = pep
|
339
|
+
end
|
340
|
+
pep.prots << self
|
334
341
|
@peps << pep
|
342
|
+
|
335
343
|
elsif line =~ @@end_prot_re
|
336
344
|
set_from_xml_hash(hash)
|
337
345
|
break
|
@@ -367,9 +375,12 @@ class SpecID::Bioworks::Prot < SpecID::Prot
|
|
367
375
|
end
|
368
376
|
end
|
369
377
|
|
378
|
+
Bioworks::Pep = ArrayClass.new( %w(sequence mass deltamass charge xcorr deltacn sp rsp ions count tic prots base_name first_scan last_scan peptide_probability file _num_prots _first_prot aaseq) )
|
379
|
+
# 0=sequence 1=mass 2=deltamass 3=charge 4=xcorr 5=deltacn 6=sp 7=rsp 8=ions 9=count 10=tic 11=prots 12=base_name 13=first_scan 14=last_scan 15=peptide_probability 16=file 17=_num_prots 18=_first_prot 19=aaseq
|
370
380
|
|
371
|
-
class
|
372
|
-
include SpecID::
|
381
|
+
class Bioworks::Pep
|
382
|
+
include SpecID::Pep
|
383
|
+
include Bioworks::XML
|
373
384
|
include SpecIDXML
|
374
385
|
|
375
386
|
@@file_split_first_re = /, /o
|
@@ -380,53 +391,18 @@ class SpecID::Bioworks::Pep < Array
|
|
380
391
|
@@file_mult_scan_re = /(.*), (\d+) - (\d+)/o
|
381
392
|
## NOTE! the mass is really the theoretical MH+!!!!
|
382
393
|
## NOTE! ALL values stored as strings, except peptide_probability!
|
383
|
-
ind_keys = {} ; ind_keys_w_eq = {}; @@ind = {}
|
384
|
-
ind_keys = {:sequence => 0, :mass => 1, :deltamass => 2, :charge => 3, :xcorr => 4, :deltacn => 5, :sp => 6, :rsp => 7, :ions => 8, :count => 9, :tic => 10, :prot => 11, :base_name => 12, :first_scan => 13, :last_scan => 14, :peptide_probability => 15, :file => 16, :_num_prots => 17, :_first_prot => 18 }
|
385
|
-
|
386
|
-
|
387
|
-
def sequence ; self[0] end ; def sequence=(oth) ; self[0] = oth end
|
388
|
-
def mass ; self[1] end ; def mass=(oth) ; self[1] = oth end
|
389
|
-
def deltamass ; self[2] end ; def deltamass=(oth) ; self[2] = oth end
|
390
|
-
def charge ; self[3] end ; def charge=(oth) ; self[3] = oth end
|
391
|
-
def xcorr ; self[4] end ; def xcorr=(oth) ; self[4] = oth end
|
392
|
-
def deltacn ; self[5] end ; def deltacn=(oth) ; self[5] = oth end
|
393
|
-
def sp ; self[6] end ; def sp=(oth) ; self[6] = oth end
|
394
|
-
def rsp ; self[7] end ; def rsp=(oth) ; self[7] = oth end
|
395
|
-
def ions ; self[8] end ; def ions=(oth) ; self[8] = oth end
|
396
|
-
def count ; self[9] end ; def count=(oth) ; self[9] = oth end
|
397
|
-
def tic ; self[10] end ; def tic=(oth) ; self[10] = oth end
|
398
|
-
def prot ; self[11] end ; def prot=(oth) ; self[11] = oth end
|
399
|
-
def base_name ; self[12] end ; def base_name=(oth) ; self[12] = oth end
|
400
|
-
def first_scan ; self[13] end ; def first_scan=(oth) ; self[13] = oth end
|
401
|
-
def last_scan ; self[14] end ; def last_scan=(oth) ; self[14] = oth end
|
402
|
-
def peptide_probability ; self[15] end ; def peptide_probability=(oth) ; self[15] = oth end
|
403
|
-
def file ; self[16] end # we define a writer below
|
404
|
-
def _num_prots ; self[17] end ; def _num_prots=(oth) ; self[17] = oth end
|
405
|
-
def _first_prot ; self[18] end ; def _first_prot=(oth) ; self[18] = oth end
|
406
394
|
|
407
395
|
## other accessors:
|
408
396
|
def probability ; self[15] end
|
397
|
+
def mh ; self[1] end
|
409
398
|
|
410
|
-
#
|
411
|
-
#
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
ind_keys.merge!(ind_keys_w_eq)
|
416
|
-
ind_keys.each {|k,v| @@ind[k] = v ; @@ind["#{k}"] = v}
|
417
|
-
|
418
|
-
def initialize(args=nil)
|
419
|
-
super(@@arr_size.size)
|
420
|
-
if args
|
421
|
-
if args.is_a? Hash
|
422
|
-
args.each do |k,v|
|
423
|
-
self[@@ind[k]] = v
|
424
|
-
end
|
425
|
-
end
|
426
|
-
end
|
399
|
+
# This is not a true ppm since it should be divided by the actual mh instead
|
400
|
+
# of the theoretical (but it is as close as we can get for this object)
|
401
|
+
def ppm
|
402
|
+
1.0e6 * (self[2].abs/self[1])
|
403
|
+
#1.0e6 * (self.deltamass.abs/self.mh)
|
427
404
|
end
|
428
405
|
|
429
|
-
|
430
406
|
# returns array of values of the attributes given (as symbols)
|
431
407
|
def get(*args)
|
432
408
|
args.collect do |arg|
|
@@ -463,25 +439,30 @@ class SpecID::Bioworks::Pep < Array
|
|
463
439
|
[base_name, first_scan, last_scan]
|
464
440
|
end
|
465
441
|
|
442
|
+
tmp_verb = $VERBOSE
|
443
|
+
$VERBOSE = nil
|
466
444
|
def file=(arg)
|
467
445
|
## Set these vals by index:
|
468
446
|
#puts "AERRG: #{arg}"
|
469
447
|
self[16] = arg
|
470
448
|
self[12,3] = self.class.extract_file_info(arg)
|
471
449
|
end
|
450
|
+
$VERBOSE = tmp_verb
|
472
451
|
|
473
452
|
def inspect
|
474
|
-
"<
|
453
|
+
"<Bioworks::Pep sequence: #{sequence}, mass: #{mass}, deltamass: #{deltamass}, charge: #{charge}, xcorr: #{xcorr}, deltacn: #{deltacn}, prots(count):#{prots.size}, base_name: #{base_name}, first_scan: #{first_scan}, last_scan: #{last_scan}, file: #{file}, peptide_probability: #{peptide_probability}, aaseq:#{aaseq}>"
|
454
|
+
|
455
|
+
|
475
456
|
end
|
476
457
|
|
477
458
|
def set_from_hash(hash)
|
478
459
|
self[0,11] = [hash["sequence"], hash["mass"], hash["deltamass"], hash["charge"], hash["xcorr"], hash["deltacn"], hash["sp"], hash["rsp"], hash["ions"], hash["count"], hash["tic"]]
|
479
460
|
self.file = hash["file"]
|
480
461
|
self[15] = hash["peptide_probability"].to_f
|
462
|
+
self[19] = SpecID::Pep.sequence_to_aaseq(self[0]) ## aaseq
|
481
463
|
end
|
482
464
|
|
483
|
-
def set_from_xml_stream(fh
|
484
|
-
self[11] = prot
|
465
|
+
def set_from_xml_stream(fh)
|
485
466
|
hash = {}
|
486
467
|
while line = fh.gets
|
487
468
|
if line =~ @@att_re
|
@@ -491,7 +472,6 @@ class SpecID::Bioworks::Pep < Array
|
|
491
472
|
elsif line =~ @@end_pep_re
|
492
473
|
set_from_hash(hash)
|
493
474
|
#puts "SELF[12]: #{self[12]}"
|
494
|
-
unless self[12] then self[12] = prot.bioworks.global_filename end
|
495
475
|
#puts "SELF[12]: #{self[12]}"
|
496
476
|
break
|
497
477
|
else
|