mspire 0.5.0 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +24 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/lib/cv/description.rb +18 -0
- data/lib/cv/param.rb +33 -0
- data/lib/cv.rb +3 -0
- data/lib/io/bookmark.rb +13 -0
- data/lib/merge.rb +7 -0
- data/lib/ms/cvlist.rb +76 -0
- data/lib/ms/digester.rb +245 -0
- data/lib/ms/fasta.rb +86 -0
- data/lib/ms/ident/peptide/db.rb +243 -0
- data/lib/ms/ident/peptide.rb +72 -0
- data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
- data/lib/ms/ident/peptide_hit.rb +26 -0
- data/lib/ms/ident/pepxml/modifications.rb +83 -0
- data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
- data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
- data/lib/ms/ident/pepxml/parameters.rb +14 -0
- data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
- data/lib/ms/ident/pepxml/search_database.rb +49 -0
- data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
- data/lib/ms/ident/pepxml/search_hit.rb +144 -0
- data/lib/ms/ident/pepxml/search_result.rb +35 -0
- data/lib/ms/ident/pepxml/search_summary.rb +92 -0
- data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
- data/lib/ms/ident/pepxml.rb +112 -0
- data/lib/ms/ident/protein.rb +33 -0
- data/lib/ms/ident/protein_group.rb +80 -0
- data/lib/ms/ident/search.rb +114 -0
- data/lib/ms/ident.rb +37 -0
- data/lib/ms/isotope/aa.rb +59 -0
- data/lib/ms/mascot.rb +6 -0
- data/lib/ms/mass/aa.rb +79 -0
- data/lib/ms/mass.rb +55 -0
- data/lib/ms/mzml/index_list.rb +98 -0
- data/lib/ms/mzml/plms1.rb +34 -0
- data/lib/ms/mzml.rb +197 -0
- data/lib/ms/obo.rb +38 -0
- data/lib/ms/plms1.rb +156 -0
- data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
- data/lib/ms/quant/qspec.rb +112 -0
- data/lib/ms/spectrum.rb +154 -8
- data/lib/ms.rb +3 -10
- data/lib/msplat.rb +2 -0
- data/lib/obo/ims.rb +5 -0
- data/lib/obo/ms.rb +7 -0
- data/lib/obo/ontology.rb +41 -0
- data/lib/obo/unit.rb +5 -0
- data/lib/openany.rb +23 -0
- data/lib/write_file_or_string.rb +18 -0
- data/obo/ims.obo +562 -0
- data/obo/ms.obo +11677 -0
- data/obo/unit.obo +2563 -0
- data/spec/ms/cvlist_spec.rb +60 -0
- data/spec/ms/digester_spec.rb +351 -0
- data/spec/ms/fasta_spec.rb +100 -0
- data/spec/ms/ident/peptide/db_spec.rb +108 -0
- data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
- data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
- data/spec/ms/ident/pepxml_spec.rb +442 -0
- data/spec/ms/ident/protein_group_spec.rb +68 -0
- data/spec/ms/mass_spec.rb +8 -0
- data/spec/ms/mzml/index_list_spec.rb +122 -0
- data/spec/ms/mzml/plms1_spec.rb +62 -0
- data/spec/ms/mzml_spec.rb +50 -0
- data/spec/ms/plms1_spec.rb +38 -0
- data/spec/ms/quant/qspec_spec.rb +25 -0
- data/spec/msplat_spec.rb +24 -0
- data/spec/obo_spec.rb +25 -0
- data/spec/spec_helper.rb +25 -0
- data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
- data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
- data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
- data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
- data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
- data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
- data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
- data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
- data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
- data/spec/testfiles/plms1/output.key +0 -0
- metadata +157 -40
- data/README +0 -77
- data/changelog.txt +0 -196
- data/lib/ms/calc.rb +0 -32
- data/lib/ms/data/interleaved.rb +0 -60
- data/lib/ms/data/lazy_io.rb +0 -73
- data/lib/ms/data/lazy_string.rb +0 -15
- data/lib/ms/data/simple.rb +0 -59
- data/lib/ms/data/transposed.rb +0 -41
- data/lib/ms/data.rb +0 -57
- data/lib/ms/format/format_error.rb +0 -12
- data/lib/ms/support/binary_search.rb +0 -126
@@ -0,0 +1,165 @@
|
|
1
|
+
require 'merge'
|
2
|
+
require 'strscan'
|
3
|
+
|
4
|
+
module MS ; end
|
5
|
+
module MS::Ident ; end
|
6
|
+
class MS::Ident::Pepxml ; end
|
7
|
+
|
8
|
+
class MS::Ident::Pepxml::SampleEnzyme
|
9
|
+
include Merge
|
10
|
+
# an identifier
|
11
|
+
attr_accessor :name
|
12
|
+
# amino acids after which to cleave
|
13
|
+
attr_accessor :cut
|
14
|
+
# cleave at 'cut' amino acids UNLESS it is followed by 'no_cut'
|
15
|
+
attr_accessor :no_cut
|
16
|
+
# 'C' or 'N'
|
17
|
+
attr_accessor :sense
|
18
|
+
|
19
|
+
# Can pass in a name of an enzyme that is recognized (meaning there is a
|
20
|
+
# set_<name> method), or
|
21
|
+
# trypsin
|
22
|
+
# For other enzymes, you must set :cut, :no_cut, :name, and :sense will
|
23
|
+
def initialize(arg={})
|
24
|
+
if arg.is_a?(String)
|
25
|
+
@name = arg
|
26
|
+
send("set_#{@name}".to_sym)
|
27
|
+
else
|
28
|
+
merge!(arg)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def set_trypsin
|
33
|
+
@sense = 'C'
|
34
|
+
@cut = 'KR'
|
35
|
+
@no_cut = 'P'
|
36
|
+
end
|
37
|
+
|
38
|
+
# if an xml builder object is given, it adds to the object and returns the
|
39
|
+
# builder object, otherwise it returns an xml fragment string
|
40
|
+
def to_xml(builder=nil)
|
41
|
+
xmlb = builder || Nokogiri::XML::Builder.new
|
42
|
+
xmlb.sample_enzyme(:name => name) do |xmlb|
|
43
|
+
xmlb.specificity(:cut => cut, :no_cut => no_cut, :sense => sense)
|
44
|
+
end
|
45
|
+
builder || xmlb.doc.root.to_xml
|
46
|
+
end
|
47
|
+
|
48
|
+
# returns self
|
49
|
+
def from_pepxml_node(node)
|
50
|
+
self.name = node['name']
|
51
|
+
ch = node.child
|
52
|
+
self.cut = ch['cut']
|
53
|
+
self.no_cut= ch['no_cut']
|
54
|
+
self.sense = ch['sense']
|
55
|
+
self
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.from_pepxml_node(node)
|
59
|
+
self.new.from_pepxml_node(node)
|
60
|
+
end
|
61
|
+
|
62
|
+
# takes an amino acid sequence (e.g. PEPTIDE).
|
63
|
+
# returns the number of missed cleavages
|
64
|
+
def num_missed_cleavages(aaseq)
|
65
|
+
seq_to_scan = ' ' + aaseq + ' '
|
66
|
+
raise NotImplementedError, 'need to implement for N terminal sense' if sense == 'N'
|
67
|
+
@num_missed_cleavages_regex =
|
68
|
+
if @num_missed_cleavages_regex ; @num_missed_cleavages_regex
|
69
|
+
else
|
70
|
+
regex_string = "[#{@cut}]"
|
71
|
+
if @no_cut and @no_cut != ''
|
72
|
+
regex_string << "[^#{@no_cut}]"
|
73
|
+
end
|
74
|
+
/#{regex_string}/
|
75
|
+
end
|
76
|
+
arr = aaseq.scan(@num_missed_cleavages_regex)
|
77
|
+
num = arr.size
|
78
|
+
if aaseq[-1,1] =~ @num_missed_cleavages_regex
|
79
|
+
num -= 1
|
80
|
+
end
|
81
|
+
num
|
82
|
+
end
|
83
|
+
|
84
|
+
# No arguments should contain non-standard amino acids
|
85
|
+
def num_tol_term(prev_aa, middle, next_aa)
|
86
|
+
raise NotImplementedError, 'need to implement for N terminal sense' if sense == 'N'
|
87
|
+
no_cut = @no_cut || ''
|
88
|
+
num_tol = 0
|
89
|
+
last_of_middle = middle[-1,1]
|
90
|
+
first_of_middle = middle[0,1]
|
91
|
+
if ( @cut.include?(prev_aa) && !no_cut.include?(first_of_middle) ) || prev_aa == '-'
|
92
|
+
num_tol += 1
|
93
|
+
end
|
94
|
+
if @cut.include?(last_of_middle) && !no_cut.include?(next_aa) || next_aa == '-'
|
95
|
+
num_tol += 1
|
96
|
+
end
|
97
|
+
num_tol
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
###################################################
|
102
|
+
###################################################
|
103
|
+
###################################################
|
104
|
+
###################################################
|
105
|
+
# This is digestion methodology:
|
106
|
+
|
107
|
+
=begin
|
108
|
+
# returns all peptides of missed cleavages <= 'missed_cleavages'
|
109
|
+
# so 2 missed cleavages will return all no missed cleavage peptides
|
110
|
+
# all 1 missed cleavages and all 2 missed cleavages.
|
111
|
+
# options:
|
112
|
+
def digest(string, missed_cleavages=0, options={})
|
113
|
+
raise NotImplementedError if @sense == 'N'
|
114
|
+
s = StringScanner.new(string)
|
115
|
+
no_cut_regex = Regexp.new("[#{@no_cut}]")
|
116
|
+
regex = Regexp.new("[#{@cut}]")
|
117
|
+
peps = []
|
118
|
+
last_pos = 0
|
119
|
+
current_pep = ''
|
120
|
+
loop do
|
121
|
+
if s.eos?
|
122
|
+
break
|
123
|
+
end
|
124
|
+
m = s.scan_until(regex)
|
125
|
+
if m ## found a cut point
|
126
|
+
last_pos = s.pos
|
127
|
+
# is the next amino acid a no_cut?
|
128
|
+
if string[s.pos,1] =~ no_cut_regex
|
129
|
+
current_pep << m
|
130
|
+
else
|
131
|
+
# cut it
|
132
|
+
current_pep << m
|
133
|
+
peps << current_pep
|
134
|
+
current_pep = ''
|
135
|
+
end
|
136
|
+
else ## didn't find a cut point
|
137
|
+
current_pep << string[last_pos..-1]
|
138
|
+
peps << current_pep
|
139
|
+
break
|
140
|
+
end
|
141
|
+
end
|
142
|
+
## LOOP through and grab each set of missed cleavages from num down to 0
|
143
|
+
all_sets_of_peps = []
|
144
|
+
(0..missed_cleavages).to_a.reverse.each do |num_mc|
|
145
|
+
all_sets_of_peps.push( *(get_missed_cleavages(peps, num_mc)) )
|
146
|
+
end
|
147
|
+
all_sets_of_peps
|
148
|
+
end
|
149
|
+
|
150
|
+
# takes an array of peptides and returns an array containing 'num' missed
|
151
|
+
# cleavages
|
152
|
+
# DOES NOT contain peptides that contain < num of missed cleavages
|
153
|
+
# (i.e., will not return missed cleaveages of 1 or 2 if num == 3
|
154
|
+
def get_missed_cleavages(ar_of_peptide_seqs, num)
|
155
|
+
(0...(ar_of_peptide_seqs.size - num)).to_a.map do |i|
|
156
|
+
ar_of_peptide_seqs[i,num+1].join
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def self.tryptic(string, missed_cleavages=0)
|
161
|
+
self.new("trypsin").digest(string, missed_cleavages)
|
162
|
+
end
|
163
|
+
|
164
|
+
end
|
165
|
+
=end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'ms/fasta'
|
2
|
+
require 'merge'
|
3
|
+
module MS ; end
|
4
|
+
module MS::Ident ; end
|
5
|
+
|
6
|
+
class MS::Ident::Pepxml
|
7
|
+
class SearchDatabase
|
8
|
+
include Merge
|
9
|
+
# required! the local, full path to the protein sequence database
|
10
|
+
attr_accessor :local_path
|
11
|
+
# required! 'AA' or 'NA'
|
12
|
+
attr_accessor :seq_type
|
13
|
+
|
14
|
+
# optional
|
15
|
+
attr_accessor :database_name
|
16
|
+
# optional
|
17
|
+
attr_accessor :orig_database_url
|
18
|
+
# optional
|
19
|
+
attr_accessor :database_release_date
|
20
|
+
# optional
|
21
|
+
attr_accessor :database_release_identifier
|
22
|
+
# optional
|
23
|
+
attr_accessor :size_of_residues
|
24
|
+
|
25
|
+
# takes a hash to fill in values
|
26
|
+
def initialize(hash={}, get_size_of_residues=false)
|
27
|
+
merge!(hash)
|
28
|
+
if get_size_of_residues && File.exist?(@local_path)
|
29
|
+
set_size_of_residues!
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# returns self for chaining
|
34
|
+
def set_size_of_residues!
|
35
|
+
@size_of_residues = 0
|
36
|
+
MS::Fasta.foreach(@local_path) do |entry|
|
37
|
+
@size_of_residues += entry.sequence.size
|
38
|
+
end
|
39
|
+
self
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_xml(builder)
|
43
|
+
attrs = [:local_path, :seq_type, :database_name, :orig_database_url, :database_release_date, :database_release_identifier, :size_of_residues].map {|k| v=send(k) ; [k, v] if v }.compact
|
44
|
+
builder.search_database(Hash[attrs])
|
45
|
+
builder
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'andand'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module MS ; end
|
5
|
+
module MS::Ident ; end
|
6
|
+
class MS::Ident::Pepxml ; end
|
7
|
+
class MS::Ident::Pepxml::SearchHit ; end
|
8
|
+
|
9
|
+
|
10
|
+
# Positions and masses of modifications
|
11
|
+
MS::Ident::Pepxml::SearchHit::ModificationInfo = Struct.new(:modified_peptide, :mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass) do
|
12
|
+
## Should be something like this:
|
13
|
+
# <modification_info mod_nterm_mass=" " mod_nterm_mass=" " modified_peptide=" ">
|
14
|
+
# <mod_aminoacid_mass position=" " mass=" "/>
|
15
|
+
# </modification_info>
|
16
|
+
# e.g.:
|
17
|
+
# <modification_info modified_peptide="GC[546]M[147]PSKEVLSAGAHR">
|
18
|
+
# <mod_aminoacid_mass position="2" mass="545.7160"/>
|
19
|
+
# <mod_aminoacid_mass position="3" mass="147.1926"/>
|
20
|
+
# </modification_info>
|
21
|
+
|
22
|
+
# Mass of modified N terminus<
|
23
|
+
#attr_accessor :mod_nterm_mass
|
24
|
+
# Mass of modified C terminus<
|
25
|
+
#attr_accessor :mod_cterm_mass
|
26
|
+
# Peptide sequence (with indicated modifications) I'm assuming that the
|
27
|
+
# native sequest indicators are OK here
|
28
|
+
#attr_accessor :modified_peptide
|
29
|
+
|
30
|
+
# These are objects of type: ...ModAminoacidMass
|
31
|
+
# position ranges from 1 to peptide length
|
32
|
+
#attr_accessor :mod_aminoacid_masses
|
33
|
+
|
34
|
+
def initialize(*args)
|
35
|
+
if args.first.is_a?(Hash)
|
36
|
+
args = args.first.values_at(*members)
|
37
|
+
end
|
38
|
+
super(*args)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Will escape any xml special chars in modified_peptide
|
42
|
+
def to_xml(builder=nil)
|
43
|
+
xmlb = builder || Nokogiri::XML::Builder.new
|
44
|
+
## Collect the modifications:
|
45
|
+
## Create the attribute string:
|
46
|
+
atts = [:mod_nterm_mass, :mod_cterm_mass, :modified_peptide]
|
47
|
+
atts.map! {|at| (v=send(at)) && [at, v] }.compact
|
48
|
+
xmlb.modification_info(Hash[atts]) do |xmlb|
|
49
|
+
mod_aminoacid_masses.andand.each do |mod_aa_mass|
|
50
|
+
mod_aa_mass.to_xml(xmlb)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
builder || xmlb.doc.root.to_s
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.from_pepxml_node(node)
|
57
|
+
self.new.from_pepxml_node(node)
|
58
|
+
end
|
59
|
+
|
60
|
+
# returns self
|
61
|
+
def from_pepxml_node(node)
|
62
|
+
self[0] = node['modified_peptide']
|
63
|
+
self[2] = node['mod_nterm_mass']
|
64
|
+
self[3] = node['mod_cterm_mass']
|
65
|
+
_masses = []
|
66
|
+
node.children do |mass_n|
|
67
|
+
_masses << MS::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new([mass_n['position'].to_i, mass_n['mass'].to_f])
|
68
|
+
end
|
69
|
+
self.mod_aminoacid_masses = _masses
|
70
|
+
self
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
MS::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass = Struct.new(:position, :mass) do
|
75
|
+
def to_xml(builder)
|
76
|
+
builder.mod_aminoacid_mass(:position => position, :mass => mass)
|
77
|
+
builder
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'merge'
|
3
|
+
require 'nokogiri'
|
4
|
+
|
5
|
+
module MS ; end
|
6
|
+
module MS::Ident ; end
|
7
|
+
|
8
|
+
|
9
|
+
class MS::Ident::Pepxml
|
10
|
+
|
11
|
+
class MS::Ident::Pepxml::SearchHit
|
12
|
+
include Merge
|
13
|
+
|
14
|
+
DEFAULT_MEMBERS = [:hit_rank, :peptide, :peptide_prev_aa, :peptide_next_aa, :num_matched_ions, :tot_num_ions, :calc_neutral_pep_mass, :massdiff, :num_tol_term, :num_missed_cleavages, :is_rejected, :protein, :num_tot_proteins, :protein_desc, :calc_pI, :protein_mw, :modification_info, :search_scores, :spectrum_query]
|
15
|
+
|
16
|
+
Required = Set.new([:hit_rank, :peptide, :protein, :num_tot_proteins, :calc_neutral_pep_mass, :massdiff])
|
17
|
+
|
18
|
+
class << self
|
19
|
+
attr_writer :members
|
20
|
+
def members
|
21
|
+
@members || DEFAULT_MEMBERS
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
members.each {|memb| attr_accessor memb }
|
26
|
+
|
27
|
+
# rank of the peptide hit (required)
|
28
|
+
attr_accessor :hit_rank
|
29
|
+
# Peptide aminoacid sequence (with no indicated modifications) (required)
|
30
|
+
attr_accessor :peptide
|
31
|
+
|
32
|
+
# Aminoacid preceding peptide ('-' if none)
|
33
|
+
attr_accessor :peptide_prev_aa
|
34
|
+
|
35
|
+
# Aminoacid following peptide (- if none)
|
36
|
+
attr_accessor :peptide_next_aa
|
37
|
+
|
38
|
+
# Number of peptide fragment ions found in spectrum (Integer)
|
39
|
+
attr_accessor :num_matched_ions
|
40
|
+
|
41
|
+
# Number of peptide fragment ions predicted for peptide (Integer)
|
42
|
+
attr_accessor :tot_num_ions
|
43
|
+
|
44
|
+
# (required)
|
45
|
+
attr_accessor :calc_neutral_pep_mass
|
46
|
+
|
47
|
+
# Mass(precursor ion) - Mass(peptide) (required)
|
48
|
+
attr_accessor :massdiff
|
49
|
+
|
50
|
+
# Number of peptide termini consistent with cleavage by sample enzyme
|
51
|
+
attr_accessor :num_tol_term
|
52
|
+
|
53
|
+
# Number of sample enzyme cleavage sites internal to peptide<
|
54
|
+
attr_accessor :num_missed_cleavages
|
55
|
+
|
56
|
+
# Potential use in future for user manual validation (true/false)
|
57
|
+
# by default, this will be set to false
|
58
|
+
# (the xml is expressed as a 0 or 1)
|
59
|
+
attr_accessor :is_rejected
|
60
|
+
|
61
|
+
# a protein identifier string (required)
|
62
|
+
attr_accessor :protein
|
63
|
+
|
64
|
+
# Number of unique proteins in search database containing peptide
|
65
|
+
# (required)
|
66
|
+
attr_accessor :num_tot_proteins
|
67
|
+
|
68
|
+
# Extracted from search database
|
69
|
+
attr_accessor :protein_desc
|
70
|
+
|
71
|
+
attr_accessor :calc_pI
|
72
|
+
attr_accessor :protein_mw
|
73
|
+
|
74
|
+
# a ModificationInfo object
|
75
|
+
attr_accessor :modification_info
|
76
|
+
|
77
|
+
# a Hash with keys (the score type) and values
|
78
|
+
# (to_xml calls each_pair to generate the xml, so a Struct would also
|
79
|
+
# work)
|
80
|
+
attr_accessor :search_scores
|
81
|
+
|
82
|
+
# a link back to the spectrum_query object
|
83
|
+
attr_accessor :spectrum_query
|
84
|
+
|
85
|
+
|
86
|
+
Non_standard_amino_acid_char_re = %r{[^A-Z\.\-]}
|
87
|
+
|
88
|
+
alias_method :aaseq, :peptide
|
89
|
+
alias_method :aaseq=, :peptide=
|
90
|
+
|
91
|
+
# takes either a hash or an ordered list of values to set.
|
92
|
+
# yeilds an empty search_scores hash if given a block.
|
93
|
+
# mind that you set the ModificationInfo object as needed.
|
94
|
+
def initialize(*args, &block)
|
95
|
+
@search_scores = {}
|
96
|
+
if args.first.is_a?(Hash)
|
97
|
+
merge!(args.first)
|
98
|
+
else
|
99
|
+
self.class.members.zip(args) do |k,v|
|
100
|
+
send("#{k}=", v)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
block.call(@search_scores) if block
|
104
|
+
end
|
105
|
+
|
106
|
+
def members
|
107
|
+
self.class.members
|
108
|
+
end
|
109
|
+
|
110
|
+
def to_xml(builder=nil)
|
111
|
+
xmlb = builder || Nokogiri::XML::Builder.new
|
112
|
+
attrs = members[0,14].map {|k| v=send(k) ; [k, v] if v }.compact
|
113
|
+
hash_attrs = Hash[attrs]
|
114
|
+
hash_attrs[:massdiff] = hash_attrs[:massdiff].to_plus_minus_string
|
115
|
+
xmlb.search_hit(hash_attrs) do |xmlb|
|
116
|
+
@modification_info.to_xml(xmlb) if @modification_info
|
117
|
+
@search_scores.each_pair {|k,v| xmlb.search_score(:name => k, :value => v) }
|
118
|
+
end
|
119
|
+
builder || xmlb.doc.root.to_xml
|
120
|
+
end
|
121
|
+
|
122
|
+
def from_pepxml_node(node)
|
123
|
+
node.attributes
|
124
|
+
self[0] = node['hit_rank'].to_i
|
125
|
+
self[1] = node['peptide']
|
126
|
+
self[2] = node['peptide_prev_aa']
|
127
|
+
self[3] = node['peptide_next_aa']
|
128
|
+
self[4] = node['protein'] ## will this be the string?? (yes, for now)
|
129
|
+
self[5] = node['num_tot_proteins'].to_i
|
130
|
+
self[6] = node['num_matched_ions'].to_i
|
131
|
+
self[7] = node['tot_num_ions'].to_i
|
132
|
+
self[8] = node['calc_neutral_pep_mass'].to_f
|
133
|
+
self[9] = node['massdiff'].to_f
|
134
|
+
self[10] = node['num_tol_term'].to_i
|
135
|
+
self[11] = node['num_missed_cleavages'].to_i
|
136
|
+
self[12] = node['is_rejected'].to_i
|
137
|
+
self
|
138
|
+
end
|
139
|
+
|
140
|
+
Simple = Struct.new(:id, :search, :aaseq, :charge, :search_scores)
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
require 'ms/ident/pepxml/search_hit'
|
4
|
+
|
5
|
+
module MS ; end
|
6
|
+
module MS::Ident ; end
|
7
|
+
class MS::Ident::Pepxml ; end
|
8
|
+
|
9
|
+
class MS::Ident::Pepxml::SearchResult
|
10
|
+
# an array of search_hits
|
11
|
+
attr_accessor :search_hits
|
12
|
+
|
13
|
+
# if block given, then yields an empty search_hits array.
|
14
|
+
# For consistency with other objects, will also take a hash that has the key
|
15
|
+
# :search_hits and the value an array.
|
16
|
+
def initialize(search_hits = [], &block)
|
17
|
+
@search_hits = search_hits
|
18
|
+
if search_hits.is_a?(Hash)
|
19
|
+
@search_hits = search_hits[:search_hits]
|
20
|
+
end
|
21
|
+
block.call(@search_hits) if block
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_xml(builder=nil)
|
25
|
+
xmlb = builder || Nokogiri::XML::Builder.new
|
26
|
+
builder.search_result do |xmlb|
|
27
|
+
search_hits.each do |sh|
|
28
|
+
sh.to_xml(xmlb)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
builder || xmlb.doc.root.to_xml
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'ms/ident/pepxml/search_database'
|
2
|
+
require 'ms/ident/pepxml/modifications'
|
3
|
+
require 'ms/ident/pepxml/parameters'
|
4
|
+
|
5
|
+
require 'nokogiri'
|
6
|
+
require 'merge'
|
7
|
+
|
8
|
+
module MS ; end
|
9
|
+
module MS::Ident ; end
|
10
|
+
class MS::Ident::Pepxml ; end
|
11
|
+
|
12
|
+
|
13
|
+
# requires these keys:
|
14
|
+
#
|
15
|
+
# :enzyme => a valid enzyme name
|
16
|
+
# :max_num_internal_cleavages => max number of internal cleavages allowed
|
17
|
+
# :min_number_termini => minimum number of termini??
|
18
|
+
class MS::Ident::Pepxml::EnzymaticSearchConstraint < Hash
|
19
|
+
end
|
20
|
+
|
21
|
+
class MS::Ident::Pepxml::SearchSummary
|
22
|
+
include Merge
|
23
|
+
|
24
|
+
DEFAULT_SEARCH_ID = '1'
|
25
|
+
|
26
|
+
attr_accessor :base_name
|
27
|
+
# required in v18-19, optional in later versions
|
28
|
+
attr_accessor :out_data_type
|
29
|
+
# required in v18-19, optional in later versions
|
30
|
+
attr_accessor :out_data
|
31
|
+
# by default, "1"
|
32
|
+
attr_accessor :search_id
|
33
|
+
# an array of MS::Ident::Pepxml::Modification objects
|
34
|
+
attr_accessor :modifications
|
35
|
+
# A SearchDatabase object (responds to :local_path and :type)
|
36
|
+
attr_accessor :search_database
|
37
|
+
# the other search paramaters as a hash
|
38
|
+
attr_accessor :parameters
|
39
|
+
# the search engine used, SEQUEST, Mascot, Comet, etc.
|
40
|
+
attr_accessor :search_engine
|
41
|
+
# required: 'average' or 'monoisotopic'
|
42
|
+
attr_accessor :precursor_mass_type
|
43
|
+
# required: 'average' or 'monoisotopic'
|
44
|
+
attr_accessor :fragment_mass_type
|
45
|
+
# An EnzymaticSearchConstraint object (at the moment this is merely a hash
|
46
|
+
# with a few required keys
|
47
|
+
attr_accessor :enzymatic_search_constraint
|
48
|
+
|
49
|
+
def block_arg
|
50
|
+
[@search_database = MS::Ident::Pepxml::SearchDatabase.new,
|
51
|
+
@enzymatic_search_constraint = MS::Ident::Pepxml::EnzymaticSearchConstraint.new,
|
52
|
+
@modifications,
|
53
|
+
@parameters = MS::Ident::Pepxml::Parameters.new,
|
54
|
+
]
|
55
|
+
end
|
56
|
+
|
57
|
+
# initializes modifications to an empty array
|
58
|
+
def initialize(hash={}, &block)
|
59
|
+
@modifications = []
|
60
|
+
@search_id = DEFAULT_SEARCH_ID
|
61
|
+
merge!(hash, &block)
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_xml(builder=nil)
|
65
|
+
# TODO: out_data and out_data_type are optional in later pepxml versions...
|
66
|
+
# should work that in...
|
67
|
+
attrs = [:base_name, :search_engine, :precursor_mass_type, :fragment_mass_type, :out_data_type, :out_data, :search_id]
|
68
|
+
hash = Hash[ attrs.map {|at| v=send(at) ; [at, v] if v }.compact ]
|
69
|
+
xmlb = builder || Nokogiri::XML::Builder.new
|
70
|
+
builder.search_summary(hash) do |xmlb|
|
71
|
+
search_database.to_xml(xmlb)
|
72
|
+
xmlb.enzymatic_search_constraint(enzymatic_search_constraint) if enzymatic_search_constraint
|
73
|
+
modifications.each do |mod|
|
74
|
+
mod.to_xml(xmlb)
|
75
|
+
end
|
76
|
+
parameters.to_xml(xmlb) if parameters
|
77
|
+
end
|
78
|
+
builder || xmlb.doc.root.to_xml
|
79
|
+
end
|
80
|
+
|
81
|
+
def self.from_pepxml_node(node)
|
82
|
+
self.new.from_pepxml_node(node)
|
83
|
+
end
|
84
|
+
|
85
|
+
def from_pepxml_node(node)
|
86
|
+
raise NotImplementedError, "not implemented just yet (just use the raw xml node)"
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
|
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'ms/mass'
|
3
|
+
require 'merge'
|
4
|
+
|
5
|
+
require 'ms/ident/pepxml/search_result'
|
6
|
+
|
7
|
+
module MS ; end
|
8
|
+
module MS::Ident ; end
|
9
|
+
class MS::Ident::Pepxml ; end
|
10
|
+
|
11
|
+
# search_specification is a search constraint applied specifically to this query (a String)
|
12
|
+
class MS::Ident::Pepxml::SpectrumQuery
|
13
|
+
include Merge
|
14
|
+
DEFAULT_MEMBERS = [:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :index, :assumed_charge, :retention_time_sec, :search_specification, :search_results, :pepxml_version]
|
15
|
+
|
16
|
+
class << self
|
17
|
+
attr_writer :members
|
18
|
+
def members
|
19
|
+
@members || DEFAULT_MEMBERS
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
members.each {|memb| attr_accessor memb }
|
24
|
+
|
25
|
+
Required = Set.new([:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :index, :assumed_charge])
|
26
|
+
Optional = [:retention_time_sec, :search_specification]
|
27
|
+
|
28
|
+
# takes either a hash or an ordered list of values to set
|
29
|
+
# yeilds an empty search_results array if given a block
|
30
|
+
def initialize(*args, &block)
|
31
|
+
@search_results = []
|
32
|
+
if args.first.is_a?(Hash)
|
33
|
+
merge!(args.first)
|
34
|
+
else
|
35
|
+
self.class.members.zip(args) do |k,v|
|
36
|
+
send("#{k}=", v)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
block.call(@search_results) if block
|
40
|
+
end
|
41
|
+
|
42
|
+
def members
|
43
|
+
self.class.members
|
44
|
+
end
|
45
|
+
|
46
|
+
############################################################
|
47
|
+
# FOR PEPXML:
|
48
|
+
############################################################
|
49
|
+
def to_xml(builder=nil)
|
50
|
+
xmlb = builder || Nokogiri::XML::Builder.new
|
51
|
+
# all through search_specification
|
52
|
+
attrs = members[0, 8].map {|at| v=send(at) ; [at, v] if v }
|
53
|
+
attrs_hash = Hash[attrs]
|
54
|
+
case pepxml_version
|
55
|
+
when 18
|
56
|
+
attrs_hash.delete(:retention_time_sec)
|
57
|
+
end
|
58
|
+
xmlb.spectrum_query(attrs_hash) do |xmlb|
|
59
|
+
search_results.each do |search_result|
|
60
|
+
search_result.to_xml(xmlb)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
builder || xmlb.doc.root.to_xml
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.from_pepxml_node(node)
|
67
|
+
self.new.from_pepxml_node(node)
|
68
|
+
end
|
69
|
+
|
70
|
+
def from_pepxml_node(node)
|
71
|
+
@spectrum = node['spectrum']
|
72
|
+
@start_scan = node['start_scan'].to_i
|
73
|
+
@end_scan = node['end_scan'].to_i
|
74
|
+
@precursor_neutral_mass = node['precursor_neutral_mass'].to_f
|
75
|
+
@index = node['index'].to_i
|
76
|
+
@assumed_charge = node['assumed_charge'].to_i
|
77
|
+
self
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.calc_precursor_neutral_mass(m_plus_h, deltamass, h_plus=MS::Mass::H_PLUS)
|
81
|
+
m_plus_h - h_plus + deltamass
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
|