mspire 0.1.7 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +41 -14
- data/bin/bioworks2excel.rb +1 -1
- data/bin/bioworks_to_pepxml.rb +46 -59
- data/bin/fasta_shaker.rb +1 -1
- data/bin/filter.rb +6 -0
- data/bin/find_aa_freq.rb +23 -0
- data/bin/id_precision.rb +3 -2
- data/bin/mzxml_to_lmat.rb +2 -1
- data/bin/pepproph_filter.rb +1 -1
- data/bin/precision.rb +1 -1
- data/bin/protein_summary.rb +2 -451
- data/bin/raw_to_mzXML.rb +55 -0
- data/bin/srf_group.rb +26 -0
- data/changelog.txt +7 -0
- data/lib/align.rb +3 -3
- data/lib/fasta.rb +6 -1
- data/lib/gi.rb +9 -4
- data/lib/roc.rb +2 -0
- data/lib/sample_enzyme.rb +2 -1
- data/lib/spec/mzxml/parser.rb +2 -43
- data/lib/spec/mzxml.rb +65 -2
- data/lib/spec_id/aa_freqs.rb +10 -7
- data/lib/spec_id/bioworks.rb +67 -87
- data/lib/spec_id/filter.rb +794 -0
- data/lib/spec_id/precision.rb +29 -36
- data/lib/spec_id/proph.rb +5 -3
- data/lib/spec_id/protein_summary.rb +459 -0
- data/lib/spec_id/sequest.rb +323 -271
- data/lib/spec_id/srf.rb +189 -135
- data/lib/spec_id.rb +276 -227
- data/lib/spec_id_xml.rb +101 -0
- data/lib/toppred.rb +18 -0
- data/script/degenerate_peptides.rb +47 -0
- data/script/filter-peps.rb +5 -1
- data/test/tc_align.rb +1 -1
- data/test/tc_bioworks.rb +25 -22
- data/test/tc_bioworks_to_pepxml.rb +37 -4
- data/test/tc_fasta.rb +3 -1
- data/test/tc_fasta_shaker.rb +8 -6
- data/test/tc_filter.rb +203 -0
- data/test/tc_gi.rb +6 -9
- data/test/tc_id_precision.rb +31 -0
- data/test/tc_mzxml.rb +8 -6
- data/test/tc_peptide_parent_times.rb +2 -1
- data/test/tc_precision.rb +1 -1
- data/test/tc_proph.rb +5 -5
- data/test/tc_protein_summary.rb +36 -13
- data/test/tc_sequest.rb +78 -33
- data/test/tc_spec_id.rb +128 -6
- data/test/tc_srf.rb +84 -38
- metadata +67 -62
- data/bin/fasta_cat.rb +0 -39
- data/bin/fasta_cat_mod.rb +0 -59
- data/bin/fasta_mod.rb +0 -57
- data/bin/filter_spec_id.rb +0 -365
- data/bin/raw2mzXML.rb +0 -21
- data/script/gen_database_searching.rb +0 -258
data/lib/spec/mzxml/parser.rb
CHANGED
@@ -285,47 +285,6 @@ class Spec::MzXML::Parser
|
|
285
285
|
# in progress
|
286
286
|
end
|
287
287
|
|
288
|
-
# first, converts backslash to forward slash in filename.
|
289
|
-
# if .mzXML returns the filename
|
290
|
-
# if .raw or .RAW converts the file to .mZXML and returns mzXML filename
|
291
|
-
# if no recognized extension, looks for .mzXML file, then .RAW file (and
|
292
|
-
# converts)
|
293
|
-
# aborts if file was not able to be converted
|
294
|
-
def file_to_mzxml(file)
|
295
|
-
file.gsub!("\\",'/')
|
296
|
-
old_file = file.dup
|
297
|
-
if file =~ /\.mzXML$/
|
298
|
-
return file
|
299
|
-
elsif file =~ /(\.RAW)|(\.raw)$/
|
300
|
-
old_file = file.dup
|
301
|
-
## t2x outputs in cwd (so go to the directory of the file!)
|
302
|
-
dir = File.dirname(file)
|
303
|
-
basename = File.basename(file)
|
304
|
-
Dir.chdir(dir) do
|
305
|
-
cmd = "#{Spec::MzXML::MZXML_CONVERTER} #{basename}"
|
306
|
-
puts cmd
|
307
|
-
puts `#{cmd}`
|
308
|
-
end
|
309
|
-
file.sub!(/\.RAW$/, '.mzXML')
|
310
|
-
file.sub!(/\.raw$/, '.mzXML')
|
311
|
-
unless File.exist? file
|
312
|
-
abort "Couldn't convert #{old_file} to #{file}"
|
313
|
-
end
|
314
|
-
return file
|
315
|
-
else
|
316
|
-
if File.exist?( file + '.mzXML' )
|
317
|
-
return file_to_mzxml(file + '.mzXML')
|
318
|
-
elsif File.exist?( file + '.RAW' )
|
319
|
-
return file_to_mzxml(file + '.RAW')
|
320
|
-
elsif File.exist?( file + '.raw' )
|
321
|
-
return file_to_mzxml(file + '.raw')
|
322
|
-
else
|
323
|
-
return nil
|
324
|
-
end
|
325
|
-
end
|
326
|
-
|
327
|
-
end
|
328
|
-
|
329
288
|
def get_prec_mz_by_scan_for_time_index(file)
|
330
289
|
index = Spec::MSRunIndex.new(file)
|
331
290
|
prec_mz_by_scan = index.scans_by_num.collect do |scan|
|
@@ -356,7 +315,7 @@ class Spec::MzXML::Parser
|
|
356
315
|
return get_prec_mz_by_scan_for_time_index(file)
|
357
316
|
end
|
358
317
|
|
359
|
-
file = file_to_mzxml(file)
|
318
|
+
file = Spec::MzXML.file_to_mzxml(file)
|
360
319
|
|
361
320
|
unless parse_type then parse_type = default_parser end
|
362
321
|
case parse_type
|
@@ -386,7 +345,7 @@ class Spec::MzXML::Parser
|
|
386
345
|
# startMz start_mz
|
387
346
|
# endMz end_mz
|
388
347
|
def basic_info(mzxml_file)
|
389
|
-
puts "parsing: #{mzxml_file} #{File.exist?(mzxml_file)}"
|
348
|
+
puts "parsing: #{mzxml_file} #{File.exist?(mzxml_file)}" if $VERBOSE
|
390
349
|
hash = {}
|
391
350
|
scan_count_tmp = []
|
392
351
|
(1..5).to_a.each do |n| scan_count_tmp[n] = 0 end
|
data/lib/spec/mzxml.rb
CHANGED
@@ -4,14 +4,62 @@ require 'base64'
|
|
4
4
|
module Spec; end
|
5
5
|
|
6
6
|
module Spec::MzXML
|
7
|
-
|
8
|
-
MZXML_CONVERTER = 't2x'
|
7
|
+
Potential_mzxml_converters = %w(readw.exe readw t2x)
|
9
8
|
|
10
9
|
# takes PT2.7500000S and returns it as 2.700000 (no PT or S)
|
11
10
|
def strip_time(time)
|
12
11
|
return time[2...-1]
|
13
12
|
end
|
14
13
|
|
14
|
+
# first, converts backslash to forward slash in filename.
|
15
|
+
# if .mzXML returns the filename
|
16
|
+
# if .raw or .RAW converts the file to .mZXML and returns mzXML filename
|
17
|
+
# if no recognized extension, looks for .mzXML file, then .RAW file (and
|
18
|
+
# converts)
|
19
|
+
# aborts if file was not able to be converted
|
20
|
+
# returns nil if a file that can be converted or used was not found
|
21
|
+
def self.file_to_mzxml(file)
|
22
|
+
file.gsub!("\\",'/')
|
23
|
+
old_file = file.dup
|
24
|
+
if file =~ /\.mzXML$/
|
25
|
+
return file
|
26
|
+
elsif file =~ /\.RAW$/i
|
27
|
+
old_file = file.dup
|
28
|
+
## t2x outputs in cwd (so go to the directory of the file!)
|
29
|
+
dir = File.dirname(file)
|
30
|
+
basename = File.basename(file)
|
31
|
+
converter = Spec::MzXML.find_mzxml_converter
|
32
|
+
Dir.chdir(dir) do
|
33
|
+
if converter =~ /readw/
|
34
|
+
cmd = "#{converter} #{basename} c #{basename.sub(/\.RAW$/i, '.mzXML')}"
|
35
|
+
else
|
36
|
+
cmd = "#{converter} #{basename}"
|
37
|
+
end
|
38
|
+
#puts cmd
|
39
|
+
#puts `#{cmd}`
|
40
|
+
reply = `#{cmd}`
|
41
|
+
puts reply if $VERBOSE
|
42
|
+
end
|
43
|
+
file.sub!(/\.RAW$/i, '.mzXML')
|
44
|
+
unless File.exist? file
|
45
|
+
abort "Couldn't convert #{old_file} to #{file}"
|
46
|
+
end
|
47
|
+
return file
|
48
|
+
else
|
49
|
+
if File.exist?( file + '.mzXML' )
|
50
|
+
return file_to_mzxml(file + '.mzXML')
|
51
|
+
elsif File.exist?( file + '.RAW' )
|
52
|
+
return file_to_mzxml(file + '.RAW')
|
53
|
+
elsif File.exist?( file + '.raw' )
|
54
|
+
return file_to_mzxml(file + '.raw')
|
55
|
+
else
|
56
|
+
return nil
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
|
15
63
|
|
16
64
|
# takes a base64 peaks string and returns an array of [m/z,intens] doublets
|
17
65
|
# mzXML as network ordered
|
@@ -42,5 +90,20 @@ module Spec::MzXML
|
|
42
90
|
b64d.unpack(unpack_code)
|
43
91
|
end
|
44
92
|
|
93
|
+
# Searchs each path element and returns the first one it finds
|
94
|
+
# returns nil if none found
|
95
|
+
def self.find_mzxml_converter
|
96
|
+
ENV['PATH'].split(/[:;]/).each do |path|
|
97
|
+
Dir.chdir(path) do
|
98
|
+
Potential_mzxml_converters.each do |pc|
|
99
|
+
if File.exist? pc
|
100
|
+
return File.join(path, pc)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
nil
|
106
|
+
end
|
107
|
+
|
45
108
|
|
46
109
|
end
|
data/lib/spec_id/aa_freqs.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
require 'fasta'
|
2
2
|
|
3
|
+
module SpecID ; end
|
4
|
+
|
3
5
|
class SpecID::AAFreqs
|
4
6
|
# a fasta object
|
5
7
|
attr_accessor :fasta
|
@@ -77,16 +79,17 @@ class SpecID::AAFreqs
|
|
77
79
|
end
|
78
80
|
|
79
81
|
# pep_objs respond to sequence?
|
82
|
+
# also takes a hash of peptides keyed on :aaseq
|
80
83
|
def actual_and_expected_number_containing_cysteines(pep_objs, cyst_freq)
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
else
|
87
|
-
abort v.sequence.to_s + " could not be matched!"
|
84
|
+
if pep_objs.is_a? Hash
|
85
|
+
seqs = pep_objs.keys
|
86
|
+
else
|
87
|
+
seqs = pep_objs.map do |v|
|
88
|
+
v.aaseq
|
88
89
|
end
|
89
90
|
end
|
91
|
+
@aafreqs ||= {}
|
92
|
+
@aafreqs[:C] = cyst_freq
|
90
93
|
actual_and_expected_number(seqs, :C, 1)
|
91
94
|
end
|
92
95
|
|
data/lib/spec_id/bioworks.rb
CHANGED
@@ -6,13 +6,17 @@ require 'spec_id'
|
|
6
6
|
require 'zlib'
|
7
7
|
require 'hash_by'
|
8
8
|
require 'set_from_hash'
|
9
|
+
require 'array_class'
|
9
10
|
|
10
11
|
## have to pre-declare some guys
|
11
|
-
|
12
|
-
|
12
|
+
module SpecID; end
|
13
|
+
module SpecID::Prot; end
|
14
|
+
module SpecID::Pep; end
|
13
15
|
module SpecIDXML; end
|
14
16
|
|
15
|
-
class
|
17
|
+
class Bioworks
|
18
|
+
include SpecID
|
19
|
+
|
16
20
|
# Regular expressions
|
17
21
|
@@bioworksinfo_re = /<bioworksinfo>(.*)<\/bioworksinfo>/o
|
18
22
|
@@modifications_re = /<modifications>(.*)<\/modifications>/o
|
@@ -21,10 +25,9 @@ class SpecID::Bioworks
|
|
21
25
|
@@origfilepath_re = /<origfilepath>(.*)<\/origfilepath>/o
|
22
26
|
|
23
27
|
|
24
|
-
attr_accessor :prots, :version, :global_filename, :origfilename, :origfilepath
|
28
|
+
attr_accessor :peps, :prots, :version, :global_filename, :origfilename, :origfilepath
|
25
29
|
# a string of modifications e.g., "(M* +15.99491) (S@ +14.9322) "
|
26
30
|
attr_accessor :modifications
|
27
|
-
attr_writer :peps
|
28
31
|
|
29
32
|
def hi_prob_best ; false end
|
30
33
|
|
@@ -127,6 +130,7 @@ class SpecID::Bioworks
|
|
127
130
|
# note that each pep will contain its original prot it belongs to, even
|
128
131
|
# though the parallel protein actually represents the proteins it belongs
|
129
132
|
# to.
|
133
|
+
# assumes that each peptide points to all its proteins in pep.prots
|
130
134
|
def _uniq_peps_by_sequence_charge(peps)
|
131
135
|
new_arr = []
|
132
136
|
prot_arr = []
|
@@ -134,11 +138,11 @@ class SpecID::Bioworks
|
|
134
138
|
(0...peps.size).each do |i|
|
135
139
|
next if index_accounted_for.include?(i)
|
136
140
|
new_arr << peps[i]
|
137
|
-
prot_arr.push(
|
141
|
+
prot_arr.push( peps[i].prots )
|
138
142
|
((i+1)...peps.size).each do |j|
|
139
143
|
pep1, pep2 = peps[i], peps[j]
|
140
144
|
if pep1.sequence == pep2.sequence && pep1.charge == pep2.charge
|
141
|
-
prot_arr.last.push pep2.
|
145
|
+
prot_arr.last.push( *(pep2.prots) )
|
142
146
|
index_accounted_for << j
|
143
147
|
end
|
144
148
|
end
|
@@ -149,13 +153,14 @@ class SpecID::Bioworks
|
|
149
153
|
def initialize(file=nil)
|
150
154
|
@peps = nil
|
151
155
|
if file
|
156
|
+
@filename = file
|
152
157
|
parse_xml(file)
|
153
158
|
#parse_xml_by_xmlparser(file)
|
154
159
|
end
|
155
160
|
end
|
156
161
|
|
157
162
|
def parse_xml_by_xmlparser(file)
|
158
|
-
parser =
|
163
|
+
parser = Bioworks::XMLParser.new
|
159
164
|
File.open(file) do |fh|
|
160
165
|
#3.times do fh.gets end ## TEMPFIX
|
161
166
|
parser.parse(fh)
|
@@ -165,23 +170,6 @@ class SpecID::Bioworks
|
|
165
170
|
@prots = parser.prots
|
166
171
|
end
|
167
172
|
|
168
|
-
|
169
|
-
# Returns the list of all peptide hits. A given sequence/charge or scan
|
170
|
-
# may be redundant!
|
171
|
-
def peps
|
172
|
-
if @peps
|
173
|
-
return @peps
|
174
|
-
else
|
175
|
-
@peps = []
|
176
|
-
prots.each do |prot|
|
177
|
-
prot.peps.each do |pep|
|
178
|
-
@peps << pep
|
179
|
-
end
|
180
|
-
end
|
181
|
-
return @peps
|
182
|
-
end
|
183
|
-
end
|
184
|
-
|
185
173
|
# This is highly specific to Bioworks 3.2 xml export. In other words,
|
186
174
|
# unless the newlines, etc. are duplicated, this parser will fail! Not
|
187
175
|
# robust, but it is faster than xmlparser (which is based on the speedy
|
@@ -200,21 +188,23 @@ class SpecID::Bioworks
|
|
200
188
|
end
|
201
189
|
@version = get_regex_val(fh, @@bioworksinfo_re)
|
202
190
|
@modifications = get_regex_val(fh, @@modifications_re)
|
203
|
-
@prots =
|
191
|
+
@prots, @peps = get_prots_from_xml_stream(fh)
|
204
192
|
fh.close
|
205
193
|
end
|
206
194
|
|
207
|
-
|
195
|
+
## returns proteins and peptides
|
196
|
+
def get_prots_from_xml_stream(fh)
|
197
|
+
uniq_pephit_hash = {}
|
208
198
|
prots = []
|
209
199
|
while line = fh.gets
|
210
200
|
if line =~ @@protein_re
|
211
|
-
prot =
|
201
|
+
prot = Bioworks::Prot.new
|
212
202
|
prot.bioworks = self
|
213
|
-
prot.set_from_xml_stream(fh,
|
203
|
+
prot.set_from_xml_stream(fh, uniq_pephit_hash)
|
214
204
|
prots << prot
|
215
205
|
end
|
216
206
|
end
|
217
|
-
prots
|
207
|
+
[prots, uniq_pephit_hash.values]
|
218
208
|
end
|
219
209
|
|
220
210
|
# gets the regex and stops (and rewinds if it hits a protein)
|
@@ -246,7 +236,7 @@ end
|
|
246
236
|
|
247
237
|
# Implements fast parsing via XMLParser (wrapper around Expat)
|
248
238
|
# It is actually slower (about %25 slower) than regular expression parsing
|
249
|
-
class
|
239
|
+
class Bioworks::XMLParser < XMLParser
|
250
240
|
@@at = '@'
|
251
241
|
attr_accessor :prots
|
252
242
|
|
@@ -262,18 +252,18 @@ class SpecID::Bioworks::XMLParser < XMLParser
|
|
262
252
|
case name
|
263
253
|
when "peptide"
|
264
254
|
curr_prot = @current_obj
|
265
|
-
if @current_obj.class ==
|
255
|
+
if @current_obj.class == Bioworks::Prot
|
266
256
|
@current_obj.set_from_xml_hash_xmlparser(@current_hash)
|
267
257
|
else
|
268
258
|
curr_prot = @current_obj.prot ## unless previous was a peptide
|
269
259
|
end
|
270
|
-
peptide =
|
260
|
+
peptide = Bioworks::Pep.new
|
271
261
|
peptide.prot = curr_prot
|
272
262
|
curr_prot.peps << peptide
|
273
263
|
@current_obj = peptide
|
274
264
|
@current_hash = {}
|
275
265
|
when "protein"
|
276
|
-
@current_obj =
|
266
|
+
@current_obj = Bioworks::Prot.new
|
277
267
|
@current_hash = {}
|
278
268
|
@prots << @current_obj
|
279
269
|
else
|
@@ -297,13 +287,14 @@ class SpecID::Bioworks::XMLParser < XMLParser
|
|
297
287
|
|
298
288
|
end
|
299
289
|
|
300
|
-
module
|
290
|
+
module Bioworks::XML
|
301
291
|
# The regular expression to grab attributes from the bioworks xml format
|
302
292
|
@@att_re = /<([\w]+)>(.*)<\/[\w]+>/o
|
303
293
|
end
|
304
294
|
|
305
|
-
class
|
306
|
-
include SpecID::
|
295
|
+
class Bioworks::Prot
|
296
|
+
include SpecID::Prot
|
297
|
+
include Bioworks::XML
|
307
298
|
|
308
299
|
@@end_prot_re = /<\/protein>/o
|
309
300
|
@@pep_re = /<peptide>/o
|
@@ -323,15 +314,32 @@ class SpecID::Bioworks::Prot < SpecID::Prot
|
|
323
314
|
end
|
324
315
|
end
|
325
316
|
|
326
|
-
def set_from_xml_stream(fh,
|
317
|
+
def set_from_xml_stream(fh, uniq_pephit_hash)
|
327
318
|
hash = {}
|
319
|
+
@peps = []
|
328
320
|
while line = fh.gets
|
329
321
|
if line =~ @@att_re
|
330
322
|
hash[$1] = $2
|
331
323
|
elsif line =~ @@pep_re
|
332
|
-
|
333
|
-
|
324
|
+
## Could do a look ahead to grab the file and sequence to check
|
325
|
+
## uniqueness to increase speed here.
|
326
|
+
pep = Bioworks::Pep.new.set_from_xml_stream(fh)
|
327
|
+
# normal search results files have a global filename
|
328
|
+
# while multi-consensus do not
|
329
|
+
pep[12] ||= bioworks.global_filename
|
330
|
+
|
331
|
+
## figure out uniqueness
|
332
|
+
ky = [pep.base_name, pep.first_scan, pep.charge, pep.sequence]
|
333
|
+
if uniq_pephit_hash.key? ky
|
334
|
+
pep = uniq_pephit_hash[ky]
|
335
|
+
else
|
336
|
+
## insert the new protein
|
337
|
+
pep.prots = []
|
338
|
+
uniq_pephit_hash[ky] = pep
|
339
|
+
end
|
340
|
+
pep.prots << self
|
334
341
|
@peps << pep
|
342
|
+
|
335
343
|
elsif line =~ @@end_prot_re
|
336
344
|
set_from_xml_hash(hash)
|
337
345
|
break
|
@@ -367,9 +375,12 @@ class SpecID::Bioworks::Prot < SpecID::Prot
|
|
367
375
|
end
|
368
376
|
end
|
369
377
|
|
378
|
+
Bioworks::Pep = ArrayClass.new( %w(sequence mass deltamass charge xcorr deltacn sp rsp ions count tic prots base_name first_scan last_scan peptide_probability file _num_prots _first_prot aaseq) )
|
379
|
+
# 0=sequence 1=mass 2=deltamass 3=charge 4=xcorr 5=deltacn 6=sp 7=rsp 8=ions 9=count 10=tic 11=prots 12=base_name 13=first_scan 14=last_scan 15=peptide_probability 16=file 17=_num_prots 18=_first_prot 19=aaseq
|
370
380
|
|
371
|
-
class
|
372
|
-
include SpecID::
|
381
|
+
class Bioworks::Pep
|
382
|
+
include SpecID::Pep
|
383
|
+
include Bioworks::XML
|
373
384
|
include SpecIDXML
|
374
385
|
|
375
386
|
@@file_split_first_re = /, /o
|
@@ -380,53 +391,18 @@ class SpecID::Bioworks::Pep < Array
|
|
380
391
|
@@file_mult_scan_re = /(.*), (\d+) - (\d+)/o
|
381
392
|
## NOTE! the mass is really the theoretical MH+!!!!
|
382
393
|
## NOTE! ALL values stored as strings, except peptide_probability!
|
383
|
-
ind_keys = {} ; ind_keys_w_eq = {}; @@ind = {}
|
384
|
-
ind_keys = {:sequence => 0, :mass => 1, :deltamass => 2, :charge => 3, :xcorr => 4, :deltacn => 5, :sp => 6, :rsp => 7, :ions => 8, :count => 9, :tic => 10, :prot => 11, :base_name => 12, :first_scan => 13, :last_scan => 14, :peptide_probability => 15, :file => 16, :_num_prots => 17, :_first_prot => 18 }
|
385
|
-
|
386
|
-
|
387
|
-
def sequence ; self[0] end ; def sequence=(oth) ; self[0] = oth end
|
388
|
-
def mass ; self[1] end ; def mass=(oth) ; self[1] = oth end
|
389
|
-
def deltamass ; self[2] end ; def deltamass=(oth) ; self[2] = oth end
|
390
|
-
def charge ; self[3] end ; def charge=(oth) ; self[3] = oth end
|
391
|
-
def xcorr ; self[4] end ; def xcorr=(oth) ; self[4] = oth end
|
392
|
-
def deltacn ; self[5] end ; def deltacn=(oth) ; self[5] = oth end
|
393
|
-
def sp ; self[6] end ; def sp=(oth) ; self[6] = oth end
|
394
|
-
def rsp ; self[7] end ; def rsp=(oth) ; self[7] = oth end
|
395
|
-
def ions ; self[8] end ; def ions=(oth) ; self[8] = oth end
|
396
|
-
def count ; self[9] end ; def count=(oth) ; self[9] = oth end
|
397
|
-
def tic ; self[10] end ; def tic=(oth) ; self[10] = oth end
|
398
|
-
def prot ; self[11] end ; def prot=(oth) ; self[11] = oth end
|
399
|
-
def base_name ; self[12] end ; def base_name=(oth) ; self[12] = oth end
|
400
|
-
def first_scan ; self[13] end ; def first_scan=(oth) ; self[13] = oth end
|
401
|
-
def last_scan ; self[14] end ; def last_scan=(oth) ; self[14] = oth end
|
402
|
-
def peptide_probability ; self[15] end ; def peptide_probability=(oth) ; self[15] = oth end
|
403
|
-
def file ; self[16] end # we define a writer below
|
404
|
-
def _num_prots ; self[17] end ; def _num_prots=(oth) ; self[17] = oth end
|
405
|
-
def _first_prot ; self[18] end ; def _first_prot=(oth) ; self[18] = oth end
|
406
394
|
|
407
395
|
## other accessors:
|
408
396
|
def probability ; self[15] end
|
397
|
+
def mh ; self[1] end
|
409
398
|
|
410
|
-
#
|
411
|
-
#
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
ind_keys.merge!(ind_keys_w_eq)
|
416
|
-
ind_keys.each {|k,v| @@ind[k] = v ; @@ind["#{k}"] = v}
|
417
|
-
|
418
|
-
def initialize(args=nil)
|
419
|
-
super(@@arr_size.size)
|
420
|
-
if args
|
421
|
-
if args.is_a? Hash
|
422
|
-
args.each do |k,v|
|
423
|
-
self[@@ind[k]] = v
|
424
|
-
end
|
425
|
-
end
|
426
|
-
end
|
399
|
+
# This is not a true ppm since it should be divided by the actual mh instead
|
400
|
+
# of the theoretical (but it is as close as we can get for this object)
|
401
|
+
def ppm
|
402
|
+
1.0e6 * (self[2].abs/self[1])
|
403
|
+
#1.0e6 * (self.deltamass.abs/self.mh)
|
427
404
|
end
|
428
405
|
|
429
|
-
|
430
406
|
# returns array of values of the attributes given (as symbols)
|
431
407
|
def get(*args)
|
432
408
|
args.collect do |arg|
|
@@ -463,25 +439,30 @@ class SpecID::Bioworks::Pep < Array
|
|
463
439
|
[base_name, first_scan, last_scan]
|
464
440
|
end
|
465
441
|
|
442
|
+
tmp_verb = $VERBOSE
|
443
|
+
$VERBOSE = nil
|
466
444
|
def file=(arg)
|
467
445
|
## Set these vals by index:
|
468
446
|
#puts "AERRG: #{arg}"
|
469
447
|
self[16] = arg
|
470
448
|
self[12,3] = self.class.extract_file_info(arg)
|
471
449
|
end
|
450
|
+
$VERBOSE = tmp_verb
|
472
451
|
|
473
452
|
def inspect
|
474
|
-
"<
|
453
|
+
"<Bioworks::Pep sequence: #{sequence}, mass: #{mass}, deltamass: #{deltamass}, charge: #{charge}, xcorr: #{xcorr}, deltacn: #{deltacn}, prots(count):#{prots.size}, base_name: #{base_name}, first_scan: #{first_scan}, last_scan: #{last_scan}, file: #{file}, peptide_probability: #{peptide_probability}, aaseq:#{aaseq}>"
|
454
|
+
|
455
|
+
|
475
456
|
end
|
476
457
|
|
477
458
|
def set_from_hash(hash)
|
478
459
|
self[0,11] = [hash["sequence"], hash["mass"], hash["deltamass"], hash["charge"], hash["xcorr"], hash["deltacn"], hash["sp"], hash["rsp"], hash["ions"], hash["count"], hash["tic"]]
|
479
460
|
self.file = hash["file"]
|
480
461
|
self[15] = hash["peptide_probability"].to_f
|
462
|
+
self[19] = SpecID::Pep.sequence_to_aaseq(self[0]) ## aaseq
|
481
463
|
end
|
482
464
|
|
483
|
-
def set_from_xml_stream(fh
|
484
|
-
self[11] = prot
|
465
|
+
def set_from_xml_stream(fh)
|
485
466
|
hash = {}
|
486
467
|
while line = fh.gets
|
487
468
|
if line =~ @@att_re
|
@@ -491,7 +472,6 @@ class SpecID::Bioworks::Pep < Array
|
|
491
472
|
elsif line =~ @@end_pep_re
|
492
473
|
set_from_hash(hash)
|
493
474
|
#puts "SELF[12]: #{self[12]}"
|
494
|
-
unless self[12] then self[12] = prot.bioworks.global_filename end
|
495
475
|
#puts "SELF[12]: #{self[12]}"
|
496
476
|
break
|
497
477
|
else
|